protobuf_c_message_unpack(): revert hash-based required field detection

Originally, someone complained about protobuf_c_message_unpack() using alloca() for the allocation of the temporary bitmap used to detect that all required fields were present in the unpacked message (Issue #60). Commit 248eae1d eliminated the use of alloca(), replacing the variable-length alloca()'d bitmap with a 16 byte stack-allocated bitmap, treating field numbers mod 128. Andrei Nigmatulin noted in PR #137 problems with this approach: Apparently 248eae1d has introduced a serious problem to protobuf-c decoder. Originally the function of required_fields_bitmap was to prevent decoder from returning incomplete messages. That means, each required field in the message either must have a default_value or be present in the protobuf stream. The purpose of this behaviour was to provide user with 100% complete ProtobufCMessage struct on return from protobuf_c_message_unpack(), which does not need to be checked for completeness just after. This is exactly how original protobuf C++ decoder behaves. The patch 248eae1d broke this functionality by hashing bits of required fields instead of storing them separately. Consider a protobuf message with 129 fields where the first and the last fields set as 'required'. In this case it is possible to trick decoder to return incomplete ProtobufCMessage struct with missing required fields by providing only one of the two fields in the source byte stream. This can be considered as a security issue as well because user's code do not expect incomplete messages with missing required fields out from protobuf_c_message_unpack(). Such a change could introduce undefined behaviour to user programs. This patch is based on Andrei's fix and restores the exact detection of missing required fields, but avoids doing a separate allocation for the required fields bitmap except for messages whose descriptors define a large number of fields. In the "typical" case where the message descriptor has <= 128 fields we can just use a 16 byte array allocated on the stack. (Note that the hash-based approach also used a 16 byte stack allocated array.)

protobuf_c_message_unpack(): revert hash-based required field detection
Originally, someone complained about protobuf_c_message_unpack() using alloca() for the allocation of the temporary bitmap used to detect that all required fields were present in the unpacked message (Issue #60). Commit 248eae1d eliminated the use of alloca(), replacing the variable-length alloca()'d bitmap with a 16 byte stack-allocated bitmap, treating field numbers mod 128. Andrei Nigmatulin noted in PR #137 problems with this approach: Apparently 248eae1d has introduced a serious problem to protobuf-c decoder. Originally the function of required_fields_bitmap was to prevent decoder from returning incomplete messages. That means, each required field in the message either must have a default_value or be present in the protobuf stream. The purpose of this behaviour was to provide user with 100% complete ProtobufCMessage struct on return from protobuf_c_message_unpack(), which does not need to be checked for completeness just after. This is exactly how original protobuf C++ decoder behaves. The patch 248eae1d broke this functionality by hashing bits of required fields instead of storing them separately. Consider a protobuf message with 129 fields where the first and the last fields set as 'required'. In this case it is possible to trick decoder to return incomplete ProtobufCMessage struct with missing required fields by providing only one of the two fields in the source byte stream. This can be considered as a security issue as well because user's code do not expect incomplete messages with missing required fields out from protobuf_c_message_unpack(). Such a change could introduce undefined behaviour to user programs. This patch is based on Andrei's fix and restores the exact detection of missing required fields, but avoids doing a separate allocation for the required fields bitmap except for messages whose descriptors define a large number of fields. In the "typical" case where the message descriptor has <= 128 fields we can just use a 16 byte array allocated on the stack. (Note that the hash-based approach also used a 16 byte stack allocated array.)
d9a2f847 · Robert Edmonds · ec961cb0 · d9a2f847 · d9a2f847
Commit d9a2f847 authored Mar 31, 2014 by Robert Edmonds
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 23 deletions

TODO TODO +0 -3

protobuf-c/protobuf-c.c protobuf-c/protobuf-c.c +27 -20

No files found.
--- a/TODO
+++ b/TODO
@@ -8,9 +8,6 @@
  be static??
 - dispatch-within-dispatch protection needed (ie assert that you can't
  recursively invoke the main-loop)
- uh not sure i like the hash-based situation for required values.
-  perhaps we need a message-descriptor flag to help out. (Could always use
-  different hashes and sizes)
 --------------------
 --- NEEDED TESTS ---

--- a/protobuf-c/protobuf-c.c
+++ b/protobuf-c/protobuf-c.c
@@ -57,17 +57,6 @@
 /* The maximum length of a 64 bit integer in varint encoding. */
 #define MAX_UINT64_ENCODED_SIZE		10
-/*
- * This should be roughly the biggest message you think you'll encounter.
- * However, the only point of the hashing is to detect uninitialized required
- * members.  I doubt many messages have 128 REQUIRED fields, so hopefully
- * this'll be fine.
- *
- * TODO: A better solution is to separately in the descriptor index the required
- * fields, and use the allocator if the required field count is too big.
- */
-#define MAX_MEMBERS_FOR_HASH_SIZE	128
 #ifndef PROTOBUF_C_UNPACK_ERROR
 # define PROTOBUF_C_UNPACK_ERROR(...)
 #endif
@@ -2274,6 +2263,12 @@ message_init_generic(const ProtobufCMessageDescriptor *desc,
   - BOUND_SIZEOF_SCANNED_MEMBER_LOG2		\
   - FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2)
+#define REQUIRED_FIELD_BITMAP_SET(index)	\
+	(required_fields_bitmap[(index)/8] |= (1<<((index)%8)))
+#define REQUIRED_FIELD_BITMAP_IS_SET(index)	\
+	(required_fields_bitmap[(index)/8] & (1<<((index)%8)))
 ProtobufCMessage *
 protobuf_c_message_unpack(const ProtobufCMessageDescriptor *desc,
 			  ProtobufCAllocator *allocator,
@@ -2300,25 +2295,32 @@ protobuf_c_message_unpack(const ProtobufCMessageDescriptor *desc,
 	unsigned j;
 	unsigned i_slab;
 	unsigned last_field_index = 0;
-	unsigned char required_fields_bitmap[MAX_MEMBERS_FOR_HASH_SIZE / 8] =
+	unsigned required_fields_bitmap_len;
-		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
+	unsigned char required_fields_bitmap_stack[16];
+	unsigned char *required_fields_bitmap = required_fields_bitmap_stack;
+	protobuf_c_boolean required_fields_bitmap_alloced = 0;
 	ASSERT_IS_MESSAGE_DESCRIPTOR(desc);
 	if (allocator == NULL)
 		allocator = &protobuf_c_default_allocator;
-	/* We treat all fields % (16*8), which should be good enough. */
-#define REQUIRED_FIELD_BITMAP_SET(index) \
-  (required_fields_bitmap[(index/8)%sizeof(required_fields_bitmap)] |= (1<<((index)%8)))
-#define REQUIRED_FIELD_BITMAP_IS_SET(index) \
-  (required_fields_bitmap[(index/8)%sizeof(required_fields_bitmap)] & (1<<((index)%8)))
 	rv = do_alloc(allocator, desc->sizeof_message);
 	if (!rv)
 		return (NULL);
 	scanned_member_slabs[0] = first_member_slab;
+	required_fields_bitmap_len = (desc->n_fields + 7) / 8;
+	if (required_fields_bitmap_len > sizeof(required_fields_bitmap_stack)) {
+		required_fields_bitmap = do_alloc(allocator, required_fields_bitmap_len);
+		if (!required_fields_bitmap) {
+			do_free(allocator, rv);
+			return (NULL);
+		}
+		required_fields_bitmap_alloced = 1;
+	}
+	memset(required_fields_bitmap, 0, required_fields_bitmap_len);
 	/*
 	 * Generated code always defines "message_init". However, we provide a
 	 * fallback for (1) users of old protobuf-c generated-code that do not
@@ -2535,19 +2537,24 @@ protobuf_c_message_unpack(const ProtobufCMessageDescriptor *desc,
 	/* cleanup */
 	for (j = 1; j <= which_slab; j++)
 		do_free(allocator, scanned_member_slabs[j]);
+	if (required_fields_bitmap_alloced)
+		do_free(allocator, required_fields_bitmap);
 	return rv;
 error_cleanup:
 	protobuf_c_message_free_unpacked(rv, allocator);
 	for (j = 1; j <= which_slab; j++)
 		do_free(allocator, scanned_member_slabs[j]);
+	if (required_fields_bitmap_alloced)
+		do_free(allocator, required_fields_bitmap);
 	return NULL;
 error_cleanup_during_scan:
 	do_free(allocator, rv);
 	for (j = 1; j <= which_slab; j++)
 		do_free(allocator, scanned_member_slabs[j]);
+	if (required_fields_bitmap_alloced)
+		do_free(allocator, required_fields_bitmap);
 	return NULL;
 }