700字范文,内容丰富有趣,生活中的好帮手!
700字范文 > C语言cgi解析上传文件的本地测试

C语言cgi解析上传文件的本地测试

时间:2019-07-08 13:44:38

相关推荐

C语言cgi解析上传文件的本地测试

首先用浏览器提交提交一个multipart/form-data类型的数据包到本地服务器,随便用什么脚本写个接受数据包的程序,保存数据包到文件,数据包的格式大概就是这样:

------WebKitFormBoundaryvWmSGiJ8xX3qdocwContent-Disposition: form-data; name="username"fengwusan------WebKitFormBoundaryvWmSGiJ8xX3qdocwContent-Disposition: form-data; name="password"123456------WebKitFormBoundaryvWmSGiJ8xX3qdocwContent-Disposition: form-data; name="content"大家好啊,吃饭了没有?------WebKitFormBoundaryvWmSGiJ8xX3qdocwContent-Disposition: form-data; name="file1"; filename="1.docx"Content-Type: application/vnd.openxmlformats-officedocument.wordprocessingml.document[BYTE数据]----WebKitFormBoundaryvWmSGiJ8xX3qdocwContent-Disposition: form-data; name="file2"; filename="bd150998.zip"Content-Type: application/x-zip-compressed[BYTE数据]----WebKitFormBoundaryvWmSGiJ8xX3qdocw--

可以看到很多分界线:----WebKitFormBoundaryvWmSGiJ8xX3qdocw。multipart/form-data的数据包格式就是用分界线分割多个表单数据,而分界线附带在请求数据库的header信息中,名称叫做CONTENT_TYPE,如果<form>的enctype为multipart/form-data,那么CONTENT_TYPE就是:multipart/form-data; boundary={boundary},而数据包中的boundary又有特定格式,用变量表示数据包就是这样:

--{boundary}Content-Disposition: form-data; name="{input|textarea|select}.name"[data]--{boundary}Content-Disposition: form-data; name="{input|textarea|select}.name"[data]--{boundary}Content-Disposition: form-data; name="{input[type=file]}.name"; filename="{input[type=file]}.value"Content-Type: {mime-type}[data]--{boundary}Content-Disposition: form-data; name="{input[type=file]}.name"; filename="{input[type=file]}.value"Content-Type: {mime-type}[data]--{boundary}--

可以看到每个{bouondary}是以--开头,如果数据没有结束,那么紧接着{boundary}的就是\r\n换行符,如果数据包结束,那么紧接着{boundary}的就是两个中杠:--。而input|textarea|select的表单类型,在{boundary}换行之后只有name这个变量,而input[type=file]的表单类型,多了filename与Content-Type两个变量。

有了这些特征,那么开始解析这个数据包。

#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <fcntl.h>#include <time.h>#define APP_STATIC(type) static type#define APP_EXTERN(type) extern type#define APP_DECLARE(type) type __stdcall#define APP_DECLARE_NONSTD(type) type __cdecl#define APP_DECLARE_DATA#define NEW(type, len) (type *)calloc(len, sizeof(type))#define RENEW(ptr, type, pos, len) do { \ptr = (type *)realloc(ptr, sizeof(type) * (pos + len)); \memset(&ptr[pos], 0, sizeof(type) * (len)); \} while (0)APP_DECLARE(int) memcs(const char *data, int lenData, int *posData, const char *key, int lenKey, int *posKey);APP_DECLARE(char *) substr(const char *str, int pos, int len);APP_DECLARE(char *) SUBSTR(const char *data, size_t len, const char *start, const char *end);APP_DECLARE(char *) concat(char **ret, const char *str, int len);typedef enum {False,True} Boolean;#define MIN_PATH_SIZE 255// memory cursor// 内存游标,用来查询一段连续内存data中从游标posData开始另外一段内存key出现的位置及符合匹配的长度// = 0,查询成功,data游标posData移到匹配位置的后一位,key的游标posKey归零// = 1,查询到内存key在内存data的末尾有posKey长度的匹配// = -1,查询失败,key的游标posKey归零APP_DECLARE(int) memcs(const char *data, int lenData, int *posData, const char *key, int lenKey, int *posKey) {int i = *posData, p = *posKey, x = p;for (; i < lenData; i++) {if (data[i] == key[x]) {if (++x == lenKey) {*posData = i + 1;//data游标移到下一位等待下次查询*posKey = 0;return 0;}} else {//没有匹配,将游标归零if (x > p) i -= x - p;if (x > 0) x = p = 0;}}*posData = i;*posKey = x;return x > 0 ? 1 : -1;}APP_DECLARE(char *) substr(const char *str, int start, int len) {if (NULL == str || len == 0) return NULL;char *ret = NEW(char, len + 1);memmove(ret, &str[start], len);return ret;}APP_DECLARE(char *) SUBSTR(const char *str, size_t len, const char *start, const char *end) {if (str == NULL || start == NULL || end == NULL) return NULL;char *p = strstr(str, start);if (p == NULL) return NULL;p += strlen(start);if (p - str > len) return NULL;char *q = strstr(p, end);if (q == NULL) return NULL;if (q - str > len) return NULL;return substr(p, 0, q - p);}APP_DECLARE(char *) concat(char **ret, const char *str, int len) {if (str == NULL || len < 1) return *ret;char *ptr = *ret;int pos = 0;if (ptr == NULL) {ptr = NEW(char, len + 1);} else {pos = strlen(ptr);RENEW(ptr, char, pos, len + 1);}memmove(&ptr[pos], str, len);return *ret = ptr;}APP_STATIC(void) debug(const char *fmt, ...) {va_list list;FILE *fp = NULL;va_start(list, fmt);if (!fopen_s(&fp, "./debug.txt", "ab")) {vfprintf(fp, fmt, list);fclose(fp);} else {vfprintf(stderr, fmt, list);}va_end(list);}//CGI分段解析长度,256是测试用的,实际可以改为204800#define CGI_PARSE_CHUNK256typedef struct __cgi_tcgi_t, *cgi_ptr;typedef struct __cgi_item_tcgi_item_t, *cgi_item_ptr;typedef struct __cgi_file_tcgi_file_t, *cgi_file_ptr;typedef enum {PARSE_ERROR = -1,PARSE_BEGIN,PARSE_HEAD,PARSE_BODY,PARSE_END} cgiParseState;typedef enum {CGI_ITEM_ENV,CGI_ITEM_QUERY_STRING,CGI_ITEM_FORM,CGI_ITEM_FILE} cgiItemType;struct __cgi_t {size_t totalBytes;char *requestMethod, *requestType, *boundary;cgi_item_ptr envs, queries, forms;};struct __cgi_item_t {cgiItemType type;char *name;union {char *value;cgi_file_ptr file;};cgi_item_ptr next;};struct __cgi_file_t {char *filename, path[MIN_PATH_SIZE + 1];size_t size;int fd;};APP_DECLARE(cgi_ptr) cgiInit();APP_DECLARE(cgiParseState) cgiParse(cgi_ptr cgi, FILE *cgiIn);APP_STATIC(void) cgiPrintItem(cgi_ptr cgi, cgi_item_ptr item) {for (cgi_item_ptr it = item; it; it = it->next) {switch (it->type) {case CGI_ITEM_FORM: {printf("Form[%s].length = [%d]\nForm[%s].value = %s\n", it->name, it->value ? strlen(it->value) : 0, it->name, it->value);break;}case CGI_ITEM_FILE: {printf("File[%s].length = %d\nFile[%s].filename = %s\nFile[%s].path = %s\n", it->name, it->file->size, it->name, it->file->filename, it->name, it->file->path);break;}}printf("---------------------------------\n");}}int main(int argc, char *argv[]) {time_t start = time(0);cgi_ptr cgi = cgiInit();if (cgi->requestMethod == NULL) { //local testFILE *fp;if (!fopen_s(&fp, "D:/www/tmp.txt", "rb")) {//保存数据包的本地文件cgi->boundary = "----WebKitFormBoundaryvWmSGiJ8xX3qdocw";//数据包中的boundaryfseek(fp, 0, SEEK_END);cgi->totalBytes = ftell(fp);fseek(fp, 0, SEEK_SET);cgiParse(cgi, fp);}fclose(fp);} else {setmode(fileno(stdin), O_BINARY);//将stdin的数据流置为BINARY类型,这点很重要setmode(fileno(stdout), O_BINARY);if (cgi->boundary) cgiParse(cgi, stdin);}printf("Content-Type: text/plain; charset=utf-8\n\n");cgiPrintItem(cgi, cgi->forms);printf("upload %.2f MB used %d sec\n", (double)(cgi->totalBytes / (1024 * 1024)), time(0) - start);return 0;}APP_DECLARE(cgi_ptr) cgiInit() {cgi_ptr cgi = NEW(cgi_t, 1);char *tmp = getenv("CONTENT_LENGTH");cgi->totalBytes = tmp ? atoi(tmp) : 0;cgi->requestMethod = getenv("REQUEST_METHOD");cgi->requestType = getenv("CONTENT_TYPE");if (cgi->requestType != NULL && (tmp = strstr(cgi->requestType, "boundary=")) != NULL) {cgi->boundary = tmp + 9;}return cgi;}APP_STATIC(Boolean) cgi_mktemp(cgi_ptr cgi, cgi_file_ptr cf) {char tpl[] = "UP-XXXXXX";if (-1 == (cf->fd = mkstemp(tpl))) {fprintf(stderr, "%s mkstemp() failed.\n", __FUNCTION__);return False;}_snprintf(cf->path, MIN_PATH_SIZE, "%s", tpl);return True;}APP_DECLARE(cgiParseState) cgiParse(cgi_ptr cgi, FILE *cgiIn) {char buf[CGI_PARSE_CHUNK + 1] = {0}, CRLF2[] = "\r\n\r\n", *key = cgi->boundary, *filename;int index = 0, total = cgi->totalBytes, chunk = CGI_PARSE_CHUNK, pos = 0, len = 0, posData = 0, posKey = 0, lenBoundary = strlen(cgi->boundary), lenCRLF2 = strlen(CRLF2), lenKey = lenBoundary;int cs;cgiParseState state = PARSE_BEGIN;cgi_item_ptr it = NULL;cgi->forms = NEW(cgi_item_t, 1);it = cgi->forms;while (index < total && state != PARSE_END && state != PARSE_ERROR) {if (index + chunk - posData > total) {chunk = total - index + posData;}len = chunk - posData;index += len;if (len != fread(&buf[posData], sizeof(char), len, cgiIn)) {debug("fread failed at %d\n", index);state = PARSE_ERROR;break;}while (state != PARSE_END && state != PARSE_ERROR) {cs = memcs(buf, chunk, &posData, key, lenKey, &posKey);if (PARSE_BEGIN == state) {//解析开始,寻找第一个boundaryif (0 != cs) {debug("PARSE_BEGIN failed [%s]\n", buf);state = PARSE_ERROR;} else {//继续解析,寻找form-data头的结束标记[\r\n\r\n]state = PARSE_HEAD;key = CRLF2;lenKey = lenCRLF2;pos = posData;//记载boundary的结束位置,这个pos在下次搜索中表示上次搜索的位置,同样表示曾经有过上次检索}} else if (PARSE_HEAD == state) {//解析form-data头信息if (0 == cs) {it->name = SUBSTR(&buf[pos], posData - pos, "; name=\"", "\"");if (it->name == NULL) {debug("PARSE_HEAD failed [%s]", buf);state = PARSE_ERROR;} else {if (NULL != (filename = SUBSTR(&buf[pos], posData - pos, "; filename=\"", "\""))) {it->type = CGI_ITEM_FILE;it->file = NEW(cgi_file_t, 1);it->file->filename = filename;//创建一个临时文件用来保存上传文件,不然用内存保存上传文件内容,不是一个好办法cgi_mktemp(cgi, it->file);} else {it->type = CGI_ITEM_FORM;}//继续解析form-data内容,需要寻找下一个boundarystate = PARSE_BODY;key = cgi->boundary;lenKey = lenBoundary;pos = posData;}} else {//获取不到头信息,那么把上次查询的游标位置移动到起始,方便下一轮查询memmove(buf, &buf[pos], chunk - pos);posData = chunk - pos;pos = 0;break;}} else if (PARSE_BODY == state) {//寻找form-data之后的下一个{boundary}if (0 == cs) {len = posData - lenKey - pos - 4;//4 = {boundary}之前的[\r\n--]if (len > 0) {if (it->type == CGI_ITEM_FORM) {//input|textarea|select则追加到内容concat(&it->value, &buf[pos], len);} else {//input[type=file]则写入临时文件it->file->size += len;write(it->file->fd, &buf[pos], sizeof(char) * len);close(it->file->fd);}} else if (it->type == CGI_ITEM_FILE) {close(it->file->fd);}if (buf[posData] == '-' && buf[posData + 1] == '-') {state = PARSE_END;//如果找到{boundary}的结束标记,那么就结束} else {it->next = NEW(cgi_item_t, 1);it = it->next;//继续寻找下一个form-data头信息state = PARSE_HEAD;key = CRLF2;lenKey = lenCRLF2;pos = posData;}} else if (1 == cs) {//寻找到posKey长度的匹配,那么需要回溯4个字节的内存游标//这4个字节是预留给{boundary}之前的[\r\n--]len = chunk - posKey - pos - 4;//预留4个字节之后,其余的字符写入表单值或临时文件if (it->type == CGI_ITEM_FORM) {concat(&it->value, &buf[pos], len);} else {it->file->size += len;write(it->file->fd, &buf[pos], sizeof(char) * len);}posData = posKey + 4;memmove(buf, &buf[chunk - posData], posData);pos = 0;break;} else {//如果没有找到{boundary},那么分为两种情况//pos = 0,表示上次没有数据检索,是在整个buf内存块中没有搜索到key,那么写入表单值或临时文件if (0 == pos) {if (it->type == CGI_ITEM_FORM) {concat(&it->value, buf, chunk);} else {it->file->size += chunk;write(it->file->fd, buf, sizeof(char) * chunk);}posData = 0;} else {//如果存在上次检索,并且没有找到boundary,那么把上次查询的游标位置移动到起始,方便下一轮查询memmove(buf, &buf[pos], chunk - pos);posData = chunk - pos;pos = 0;}break;}}}if (state == PARSE_ERROR && it != NULL && it->type == CGI_ITEM_FILE) {close(it->file->fd);}}return state;}

编译生成test.cgi并运行

gcc -o test.cgi test.c

./test.cgi

解析成功后,在程序运行目录产生N个UP-xxxxxx文件,这些就是上传的临时文件了,于是投放到实际环境中。创建test.html:

<form action="/cgi-bin/test.cgi" method="POST" enctype="multipart/form-data"><table border="1" style="width: 100%; border: 1px solid #000000;" padding="5"><tr><td width="100">username</td><td><input type="text" name="username"/></td></tr><tr><td>password</td><td><input type="password" name="password"/></td></tr><tr><td>content</td><td><textarea name="content" style="width: 90%; height: 300px"></textarea></td></tr><tr><td>file</td><td><input type="file" name="file1"/></td></tr><tr><td>file</td><td><input type="file" name="file2"/></td></tr><tr><td>file</td><td><input type="file" name="file3"/></td></tr><tr><td>file</td><td><input type="file" name="file4"/></td></tr><tr><td>file</td><td><input type="file" name="file5"/></td></tr><tr><td colspan="2"><input type="submit" value="SUBMIT"/></td></tr></table></form>

在IIS中创建虚拟目录cgi-bin,指向test.cgi所在目录,更改IIS上传设置为1G:

rem 打开cmd,执行:%windir%\system32\inetsrv\appcmd.exe set config -section:system.webserver/serverruntime/uploadreadaheadsize:1073741824 /commit:apphost

进入站点根目录,在web.config中的configuration/system.webServer加入security节点:

<configuration><system.webServer><security><requestFiltering><requestLimits maxAllowedContentLength="1073741824" /></requestFiltering></security>...</system.webServer></configuration>

然后打开http://localhost/test.html,添加5个30M左右的文件,textarea里加入一段超长的text文件内容,比如163主页的源码,然后post,本地上传100MB,大概也就是3秒。

动态分配的内存就没释放了,仅仅是个测试,比如存放表单数据的cgi->forms最好是用hash表加array,因为input[type=checkbox]有多个同名form-data,这些最好放在一个结构里,同样其他input也可以多个同名,还有cookie和session的支持、获取单个表单值、对ISAPI的支持、对utf-8的支持、对urlEncode的支持等等,都要考虑进去,要变成成品还有一段距离。

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。