Commit f8a446fb authored by Tatsuhiro Tsujikawa's avatar Tatsuhiro Tsujikawa

nghttp2_hd: Rewrite huffman decoding

parent a8e4da80
......@@ -371,19 +371,6 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
const uint8_t *src, size_t srclen,
nghttp2_hd_side side);
/*
* Counts the number of required bytes to decode |src| with length
* |srclen|. The given input must be padded with the prefix of
* terminal code. If |side| is NGHTTP2_HD_SIDE_REQUEST, the request
* huffman code table is used. Otherwise, the response code table is
* used.
*
* This function returns the number of required bytes to decode given
* data if it succeeds, or -1.
*/
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
nghttp2_hd_side side);
/*
* Decodes the given data |src| with length |srclen|. This function
* allocates memory to store the result and assigns the its pointer to
......
......@@ -31,64 +31,11 @@
#include "nghttp2_hd.h"
extern const nghttp2_huff_sym req_huff_sym_table[];
extern const int16_t req_huff_decode_table[][256];
extern const nghttp2_huff_decode req_huff_decode_table[][16];
extern const nghttp2_huff_sym res_huff_sym_table[];
extern const int16_t res_huff_decode_table[][256];
extern const nghttp2_huff_decode res_huff_decode_table[][16];
/*
* Returns next 8 bits of data from |in|, starting |bitoff| bits
* offset. If there are fewer bits left than |bitoff|, the left bits
* with padded with 0 are returned. The |bitoff| must be strictly less
* than 8.
*/
static uint8_t get_prefix_byte(const uint8_t *in, size_t len, size_t bitoff)
{
uint8_t b;
if(bitoff == 0) {
return *in;
}
b = *in << bitoff;
if(len > 1) {
b |= *(in + 1) >> (8 - bitoff);
}
return b;
}
/*
* Decodes next byte from input |in| with length |len|, starting
* |bitoff| bit offset.
*
* This function returns the decoded symbol number (0-255 and 256 for
* special terminal symbol) if it succeeds, or -1.
*/
static int huff_decode(const uint8_t *in, size_t len, size_t bitoff,
const nghttp2_huff_sym *huff_sym_table,
const huff_decode_table_type *huff_decode_table)
{
int rv = 0;
size_t len_orig = len;
if(len == 0) {
return -1;
}
for(;;) {
rv = huff_decode_table[rv][get_prefix_byte(in, len, bitoff)];
if(rv >= 0) {
break;
}
/* Negative return value means we need to lookup next table. */
rv = -rv;
++in;
--len;
if(len == 0) {
return -1;
}
}
if(bitoff + huff_sym_table[rv].nbits > len_orig * 8) {
return -1;
}
return rv;
}
/*
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
* bits are not filled yet. The |rembits| must be in range [1, 8],
......@@ -167,89 +114,36 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
return dest - dest_first;
}
static int check_last_byte(const uint8_t *src, size_t srclen, size_t idx,
size_t bitoff)
{
uint8_t last_mask = (1 << (8 - bitoff)) - 1;
return idx + 1 == srclen && bitoff > 0 &&
(src[idx] & last_mask) == last_mask;
}
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
nghttp2_hd_side side)
{
size_t bitoff = 0;
size_t i, j;
const nghttp2_huff_sym *huff_sym_table;
const huff_decode_table_type *huff_decode_table;
if(side == NGHTTP2_HD_SIDE_REQUEST) {
huff_sym_table = req_huff_sym_table;
huff_decode_table = req_huff_decode_table;
} else {
huff_sym_table = res_huff_sym_table;
huff_decode_table = res_huff_decode_table;
}
j = 0;
for(i = 0; i < srclen;) {
int rv = huff_decode(src + i, srclen - i, bitoff,
huff_sym_table, huff_decode_table);
if(rv == -1) {
if(check_last_byte(src, srclen, i, bitoff)) {
break;
}
return -1;
}
if(rv == 256) {
/* 256 is special terminal symbol and it should not encoded in
byte string. */
return -1;
}
j++;
bitoff += huff_sym_table[rv].nbits;
i += bitoff / 8;
bitoff &= 0x7;
}
return j;
}
ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr,
const uint8_t *src, size_t srclen,
nghttp2_hd_side side)
{
size_t bitoff = 0;
size_t i, j;
const nghttp2_huff_sym *huff_sym_table;
size_t i, j, k;
const huff_decode_table_type *huff_decode_table;
uint8_t *dest = NULL;
size_t destlen = 0;
int rv;
int16_t state = 0;
const nghttp2_huff_decode *t = NULL;
/* We use the decoding algorithm described in
http://graphics.ics.uci.edu/pub/Prefix.pdf */
if(side == NGHTTP2_HD_SIDE_REQUEST) {
huff_sym_table = req_huff_sym_table;
huff_decode_table = req_huff_decode_table;
} else {
huff_sym_table = res_huff_sym_table;
huff_decode_table = res_huff_decode_table;
}
j = 0;
for(i = 0; i < srclen;) {
rv = huff_decode(src + i, srclen - i, bitoff,
huff_sym_table, huff_decode_table);
if(rv == -1) {
if(check_last_byte(src, srclen, i, bitoff)) {
break;
}
rv = NGHTTP2_ERR_HEADER_COMP;
goto fail;
}
if(rv == 256) {
/* 256 is special terminal symbol and it should not encoded in
byte string. */
for(i = 0; i < srclen; ++i) {
uint8_t in = src[i] >> 4;
for(k = 0; k < 2; ++k) {
t = &huff_decode_table[state][in];
if(t->state == -1) {
rv = NGHTTP2_ERR_HEADER_COMP;
goto fail;
}
if(j == destlen) {
if(t->flags & NGHTTP2_HUFF_SYM) {
if(destlen == j) {
size_t new_len = j == 0 ? 32 : j * 2;
uint8_t *new_dest = realloc(dest, new_len);
if(new_dest == NULL) {
......@@ -259,10 +153,15 @@ ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr,
dest = new_dest;
destlen = new_len;
}
dest[j++] = rv;
bitoff += huff_sym_table[rv].nbits;
i += bitoff / 8;
bitoff &= 0x7;
dest[j++] = t->sym;
}
state = t->state;
in = src[i] & 0xf;
}
}
if(srclen && (t->flags & NGHTTP2_HUFF_ACCEPTED) == 0) {
rv = NGHTTP2_ERR_HEADER_COMP;
goto fail;
}
*dest_ptr = dest;
return j;
......
......@@ -31,7 +31,21 @@
#include <nghttp2/nghttp2.h>
typedef int16_t huff_decode_table_type[256];
enum {
/* FSA accepts this state as the end of huffman encoding
sequence. */
NGHTTP2_HUFF_ACCEPTED = 1,
/* This state emits symbol */
NGHTTP2_HUFF_SYM = (1 << 1)
} nghttp2_huff_decode_flag;
typedef struct {
int16_t state;
uint8_t flags;
uint8_t sym;
} nghttp2_huff_decode;
typedef nghttp2_huff_decode huff_decode_table_type[16];
typedef struct {
/* The number of bits in this code */
......
This diff is collapsed.
......@@ -3,9 +3,13 @@ import re
import sys
class Node:
def __init__(self, depth):
self.depth = depth
self.children = {}
def __init__(self, term = None):
self.term = term
self.left = None
self.right = None
self.trans = []
self.id = None
self.accept = False
def to_bin(s):
res = []
......@@ -21,27 +25,99 @@ def to_bin(s):
nodes = []
def insert(node, sym, binpat, nbits, pidx):
if pidx == len(binpat) - 1:
#assert(binpat[pidx] not in node.children)
mx = (8 - (nbits & 0x7)) & 0x7;
#print "last", bin(binpat[pidx]), mx
for i in range(1 << mx):
node.children[binpat[pidx] + i] = sym
def insert(node, sym, bits):
if len(bits) == 0:
node.term = sym
return
else:
if binpat[pidx] not in node.children:
node.children[binpat[pidx]] = -len(nodes)
nextnode = Node(pidx + 1)
nodes.append(nextnode)
if bits[0] == '0':
if node.left is None:
node.left = Node()
child = node.left
else:
nextnode = nodes[-node.children[binpat[pidx]]]
insert(nextnode, sym, binpat, nbits, pidx + 1)
if node.right is None:
node.right = Node()
child = node.right
insert(child, sym, bits[1:])
def traverse(node, bits, syms, start_node, root, depth):
if depth == 4:
if 256 in syms:
syms = []
node = None
start_node.trans.append((node, bits, syms))
return
if node.term is not None:
node = root
def go(node, bit):
nbits = list(bits)
nbits.append(bit)
nsyms = list(syms)
if node.term is not None:
nsyms.append(node.term)
traverse(node, nbits, nsyms, start_node, root, depth + 1)
go(node.left, 0)
go(node.right, 1)
idseed = 0
def dfs_setid(node, prefix):
if node.term is not None:
return
if len(prefix) <= 7 and [1] * len(prefix) == prefix:
node.accept = True
global idseed
node.id = idseed
idseed += 1
dfs_setid(node.left, prefix + [0])
dfs_setid(node.right, prefix + [1])
def dfs(node, root):
if node is None:
return
traverse(node, [], [], node, root, 0)
dfs(node.left, root)
dfs(node.right, root)
NGHTTP2_HUFF_ACCEPTED = 1
NGHTTP2_HUFF_SYM = 1 << 1
def dfs_print(node):
if node.term is not None:
return
print '/* {} */'.format(node.id)
print '{'
for nd, bits, syms in node.trans:
outlen = len(syms)
flags = 0
if outlen == 0:
out = 0
else:
assert(outlen == 1)
out = syms[0]
flags |= NGHTTP2_HUFF_SYM
if nd is None:
id = -1
else:
id = nd.id
if id is None:
# if nd.id is None, it is a leaf node
id = 0
flags |= NGHTTP2_HUFF_ACCEPTED
elif nd.accept:
flags |= NGHTTP2_HUFF_ACCEPTED
print ' {{{}, 0x{:02x}, {}}},'.format(id, flags, out)
print '},'
dfs_print(node.left)
dfs_print(node.right)
symbol_tbl = [(None, 0) for i in range(257)]
tables = {}
root = Node(0)
nodes.append(root)
root = Node()
for line in sys.stdin:
m = re.match(r'.*\(\s*(\d+)\) ([|01]+) \[(\d+)\]\s+(\S+).*', line)
......@@ -50,14 +126,17 @@ for line in sys.stdin:
if len(m.group(4)) > 8:
raise Error('Code is more than 4 bytes long')
sym = int(m.group(1))
pat = re.sub(r'\|', '', m.group(2))
bits = re.sub(r'\|', '', m.group(2))
nbits = int(m.group(3))
assert(len(pat) == nbits)
binpat = to_bin(pat)
assert(len(bits) == nbits)
binpat = to_bin(bits)
assert(len(binpat) == (nbits+7)/8)
symbol_tbl[sym] = (binpat, nbits, m.group(4))
#print "Inserting", sym
insert(root, sym, binpat, nbits, 0)
insert(root, sym, bits)
dfs_setid(root, [])
dfs(root, root)
print '''\
typedef struct {
......@@ -67,7 +146,7 @@ typedef struct {
'''
print '''\
nghttp2_huff_sym huff_sym_table[] = {'''
const nghttp2_huff_sym huff_sym_table[] = {'''
for i in range(257):
pat = list(symbol_tbl[i][0])
pat += [0]*(4 - len(pat))
......@@ -77,22 +156,22 @@ for i in range(257):
print '};'
print ''
print '''int16_t huff_decode_table[][256] = {'''
for j in range(len(nodes)):
node = nodes[j]
print '/* {} */'.format(j)
print '{'
for i in range(256):
if i in node.children:
sys.stdout.write('''\
{}{}'''.format(node.children[i], ',' if i < 255 else ''))
else:
sys.stdout.write(''' NGHTTP2_HD_HUFF_NO_ENT,''')
if (i+1)&0x7 == 0:
print ''
sys.stdout.write('}')
if j == len(nodes) - 1:
print ''
else:
print ','
print '''\
enum {{
NGHTTP2_HUFF_ACCEPTED = {},
NGHTTP2_HUFF_SYM = {}
}} nghttp2_huff_decode_flag;
'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM)
print '''\
typedef struct {
int16_t state;
uint8_t flags;
uint8_t sym;
} nghttp2_huff_decode;
'''
print '''\
const nghttp2_huff_decode huff_decode_table[][16] = {'''
dfs_print(root)
print '};'
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment