当http报文中同时设置了gzip + chunked应该如何解析,如果单独解析chunked数据,只要按照chunked数据的格式定义依次进行解析即可,但是把chunked数据进行按照gzip进行压缩后,我怎么才能得到body的边界?我开始的想法对gzip压缩过http流的从长度为1开始递增解压,直到解压失败,说明到了gzip的边界,但是我写了个测试程序,如下:
#ifndef GZIP_H
#define GZIP_H
#include "zlib.h"
/* Compress gzip data */
/* data 原数据 ndata 原数据长度 zdata 压缩后数据 nzdata 压缩后长度 */
int gzcompress(Bytef *data, uLong ndata,
Bytef *zdata, uLong *nzdata)
{
z_stream c_stream;
int err = 0;
if(data && ndata > 0) {
c_stream.zalloc = NULL;
c_stream.zfree = NULL;
c_stream.opaque = NULL;
//只有设置为MAX_WBITS + 16才能在在压缩文本中带header和trailer
if(deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
MAX_WBITS + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1;
c_stream.next_in = data;
c_stream.avail_in = ndata;
c_stream.next_out = zdata;
c_stream.avail_out = *nzdata;
while(c_stream.avail_in != 0 && c_stream.total_out < *nzdata) {
if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
}
if(c_stream.avail_in != 0) return c_stream.avail_in;
for(;;) {
if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(deflateEnd(&c_stream) != Z_OK) return -1;
*nzdata = c_stream.total_out;
return 0;
}
return -1;
}
/* Uncompress gzip data */
/* zdata 数据 nzdata 原数据长度 data 解压后数据 ndata 解压后长度 */
int gzdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] = {
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = NULL;
d_stream.zfree = NULL;
d_stream.opaque = NULL;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
//只有设置为MAX_WBITS + 16才能在解压带header和trailer的文本
if(inflateInit2(&d_stream, MAX_WBITS + 16) != Z_OK) return -1;
//if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while(d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK) {
if(err == Z_DATA_ERROR) {
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK) {
return -1;
}
} else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
#endif // GZIP_H
#include "zip.h"
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
char text[] = "final compress\n";
char com[1000];
char final[1000];
int len = strlen(text);
unsigned long com_len;
unsigned long final_len = sizeof(final);
int ret;
int tmp;
ret = gzcompress(text, len, com, &com_len);
printf("ret:%d,%ld\n", ret, com_len);
com[com_len] = 't';
com[com_len + 1] = 'e';
com[com_len + 2] = 's';
com[com_len + 3] = 't';
ret = gzdecompress(com, 15, final, &final_len);
tmp = final_len;
printf("ret:%d. %d\n", ret, tmp);
final_len = sizeof(final);
ret = gzdecompress(com, 20, final, &final_len);
printf("ret:%d, %s, %d\n", ret, final, final_len);
final_len = sizeof(final);
ret = gzdecompress(com, com_len - 5, final, &final_len);
printf("ret:%d, %s, %d\n", ret, final, final_len);
final_len = sizeof(final);
ret = gzdecompress(com, com_len, final, &final_len);
printf("ret:%d, %s, %d\n", ret, final, final_len);
final_len = sizeof(final);
ret = gzdecompress(com, com_len+100, final, &final_len);
printf("ret:%d, %s, %d\n", ret, final, final_len);
return 0;
}
我把text字符段,进行gzip压缩后,得到的压缩字符串长度为com_len,压缩后的字符保存在com字符串中, 为了多个HTTP消息报文同时收到的场景,我在com字符串后面再添加了几个字符,然后,我开始按照长度为15,20,com_len - 15, com_len, com_len + 100 来解压整个字符串,发现得出的结果如下:
ret:0,35
ret:0. 4
ret:0, final com, 9
ret:0, final compress
, 15
ret:0, final compress
, 15
ret:0, final compress
, 15
可以发现在传入的数据不到com_len的时候,已经能够解析到原文的"final compress\n"字符串了,而且,传入的com_len + 100时,也没有报数据出错,所以,我的按长度不断递增解析的方法是行不通的,这样无法获得HTTP报文的边界。
所以,我的问题是在gzip+chunked这种传输方式下, 怎么确定HTTP消息的边界?
问题大概看明白了
不过我觉得测试调用gzdecompress的时候,可能需要先对final做一次清空,测试结果目前看起来可能不太对,能否清空一下测试,发一下结果?