首页 > http消息gzip+chunked的解析问题?

http消息gzip+chunked的解析问题?

当http报文中同时设置了gzip + chunked应该如何解析,如果单独解析chunked数据,只要按照chunked数据的格式定义依次进行解析即可,但是把chunked数据进行按照gzip进行压缩后,我怎么才能得到body的边界?我开始的想法对gzip压缩过http流的从长度为1开始递增解压,直到解压失败,说明到了gzip的边界,但是我写了个测试程序,如下:

#ifndef GZIP_H
#define GZIP_H

#include "zlib.h"

/* Compress gzip data */
/* data 原数据 ndata 原数据长度 zdata 压缩后数据 nzdata 压缩后长度 */
int gzcompress(Bytef *data, uLong ndata,
               Bytef *zdata, uLong *nzdata)
{
    z_stream c_stream;
    int err = 0;

    if(data && ndata > 0) {
        c_stream.zalloc = NULL;
        c_stream.zfree = NULL;
        c_stream.opaque = NULL;
        //只有设置为MAX_WBITS + 16才能在在压缩文本中带header和trailer
        if(deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
                        MAX_WBITS + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1;
        c_stream.next_in  = data;
        c_stream.avail_in  = ndata;
        c_stream.next_out = zdata;
        c_stream.avail_out  = *nzdata;
        while(c_stream.avail_in != 0 && c_stream.total_out < *nzdata) {
            if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
        }
        if(c_stream.avail_in != 0) return c_stream.avail_in;
        for(;;) {
            if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
            if(err != Z_OK) return -1;
        }
        if(deflateEnd(&c_stream) != Z_OK) return -1;
        *nzdata = c_stream.total_out;
        return 0;
    }
    return -1;
}

/* Uncompress gzip data */
/* zdata 数据 nzdata 原数据长度 data 解压后数据 ndata 解压后长度 */
int gzdecompress(Byte *zdata, uLong nzdata,
                 Byte *data, uLong *ndata)
{
    int err = 0;
    z_stream d_stream = {0}; /* decompression stream */
    static char dummy_head[2] = {
        0x8 + 0x7 * 0x10,
        (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
    };
    d_stream.zalloc = NULL;
    d_stream.zfree = NULL;
    d_stream.opaque = NULL;
    d_stream.next_in  = zdata;
    d_stream.avail_in = 0;
    d_stream.next_out = data;
    //只有设置为MAX_WBITS + 16才能在解压带header和trailer的文本
    if(inflateInit2(&d_stream, MAX_WBITS + 16) != Z_OK) return -1;
    //if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
    while(d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
        d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
        if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
        if(err != Z_OK) {
            if(err == Z_DATA_ERROR) {
                d_stream.next_in = (Bytef*) dummy_head;
                d_stream.avail_in = sizeof(dummy_head);
                if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK) {
                    return -1;
                }
            } else return -1;
        }
    }
    if(inflateEnd(&d_stream) != Z_OK) return -1;
    *ndata = d_stream.total_out;
    return 0;
}

#endif // GZIP_H



#include "zip.h"
#include <stdio.h>
#include <string.h>

int main(int argc, char *argv[])
{
    char text[] = "final compress\n";
    char com[1000];  
    char final[1000];
    int len = strlen(text);
    unsigned long com_len;
    unsigned long final_len = sizeof(final);
    int ret;
    int tmp;

    ret = gzcompress(text, len, com, &com_len);
    printf("ret:%d,%ld\n", ret, com_len);
    com[com_len] = 't';
    com[com_len + 1] = 'e';
    com[com_len + 2] = 's';
    com[com_len + 3] = 't';

    ret = gzdecompress(com, 15, final, &final_len);
    tmp = final_len;
    printf("ret:%d. %d\n", ret, tmp);
    final_len = sizeof(final);
    ret = gzdecompress(com, 20, final, &final_len);
    printf("ret:%d, %s, %d\n", ret, final, final_len);
    final_len = sizeof(final);
    ret = gzdecompress(com, com_len - 5, final, &final_len);
    printf("ret:%d, %s, %d\n", ret, final, final_len);
    final_len = sizeof(final);
    ret = gzdecompress(com, com_len, final, &final_len);
    printf("ret:%d, %s, %d\n", ret, final, final_len);
    final_len = sizeof(final);
    ret = gzdecompress(com, com_len+100, final, &final_len);
    printf("ret:%d, %s, %d\n", ret, final, final_len);
    return 0;
}

我把text字符段,进行gzip压缩后,得到的压缩字符串长度为com_len,压缩后的字符保存在com字符串中, 为了多个HTTP消息报文同时收到的场景,我在com字符串后面再添加了几个字符,然后,我开始按照长度为15,20,com_len - 15, com_len, com_len + 100 来解压整个字符串,发现得出的结果如下:

ret:0,35
ret:0. 4
ret:0, final com, 9
ret:0, final compress
, 15
ret:0, final compress
, 15
ret:0, final compress
, 15

可以发现在传入的数据不到com_len的时候,已经能够解析到原文的"final compress\n"字符串了,而且,传入的com_len + 100时,也没有报数据出错,所以,我的按长度不断递增解析的方法是行不通的,这样无法获得HTTP报文的边界。

所以,我的问题是在gzip+chunked这种传输方式下, 怎么确定HTTP消息的边界?


问题大概看明白了
不过我觉得测试调用gzdecompress的时候,可能需要先对final做一次清空,测试结果目前看起来可能不太对,能否清空一下测试,发一下结果?

【热门文章】
【热门文章】