sic/src/multipartdings/MultipartParser.h

430 lines
9.4 KiB
C++

#ifndef _MULTIPART_PARSER_H_
#define _MULTIPART_PARSER_H_
#include <sys/types.h>
#include <string>
#include <stdexcept>
class MultipartParser {
public:
typedef void (*Callback)(const char *buffer, size_t start, size_t end, void *userData);
private:
static const char CR = 13;
static const char LF = 10;
static const char SPACE = 32;
static const char HYPHEN = 45;
static const char COLON = 58;
static const size_t UNMARKED = (size_t) -1;
enum State {
ERROR,
START,
START_BOUNDARY,
HEADER_FIELD_START,
HEADER_FIELD,
HEADER_VALUE_START,
HEADER_VALUE,
HEADER_VALUE_ALMOST_DONE,
HEADERS_ALMOST_DONE,
PART_DATA_START,
PART_DATA,
PART_END,
END
};
enum Flags {
PART_BOUNDARY = 1,
LAST_BOUNDARY = 2
};
std::string boundary;
const char *boundaryData;
size_t boundarySize;
bool boundaryIndex[256];
char *lookbehind;
size_t lookbehindSize;
State state;
int flags;
size_t index;
size_t headerFieldMark;
size_t headerValueMark;
size_t partDataMark;
const char *errorReason;
void resetCallbacks() {
onPartBegin = NULL;
onHeaderField = NULL;
onHeaderValue = NULL;
onHeaderEnd = NULL;
onHeadersEnd = NULL;
onPartData = NULL;
onPartEnd = NULL;
onEnd = NULL;
userData = NULL;
}
void indexBoundary() {
const char *current;
const char *end = boundaryData + boundarySize;
memset(boundaryIndex, 0, sizeof(boundaryIndex));
for (current = boundaryData; current < end; current++) {
boundaryIndex[(unsigned char) *current] = true;
}
}
void callback(Callback cb, const char *buffer = NULL, size_t start = UNMARKED,
size_t end = UNMARKED, bool allowEmpty = false)
{
if (start != UNMARKED && start == end && !allowEmpty) {
return;
}
if (cb != NULL) {
cb(buffer, start, end, userData);
}
}
void dataCallback(Callback cb, size_t &mark, const char *buffer, size_t i, size_t bufferLen,
bool clear, bool allowEmpty = false)
{
if (mark == UNMARKED) {
return;
}
if (!clear) {
callback(cb, buffer, mark, bufferLen, allowEmpty);
mark = 0;
} else {
callback(cb, buffer, mark, i, allowEmpty);
mark = UNMARKED;
}
}
char lower(char c) const {
return c | 0x20;
}
inline bool isBoundaryChar(char c) const {
return boundaryIndex[(unsigned char) c];
}
bool isHeaderFieldCharacter(char c) const {
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == HYPHEN;
}
void setError(const char *message) {
state = ERROR;
errorReason = message;
}
void processPartData(size_t &prevIndex, size_t &index, const char *buffer,
size_t len, size_t boundaryEnd, size_t &i, char c, State &state, int &flags)
{
prevIndex = index;
if (index == 0) {
// boyer-moore derived algorithm to safely skip non-boundary data
while (i + boundarySize <= len) {
if (isBoundaryChar(buffer[i + boundaryEnd])) {
break;
}
i += boundarySize;
}
c = buffer[i];
}
if (index < boundarySize) {
if (boundary[index] == c) {
if (index == 0) {
dataCallback(onPartData, partDataMark, buffer, i, len, true);
}
index++;
} else {
index = 0;
}
} else if (index == boundarySize) {
index++;
if (c == CR) {
// CR = part boundary
flags |= PART_BOUNDARY;
} else if (c == HYPHEN) {
// HYPHEN = end boundary
flags |= LAST_BOUNDARY;
} else {
index = 0;
}
} else if (index - 1 == boundarySize) {
if (flags & PART_BOUNDARY) {
index = 0;
if (c == LF) {
// unset the PART_BOUNDARY flag
flags &= ~PART_BOUNDARY;
callback(onPartEnd);
callback(onPartBegin);
state = HEADER_FIELD_START;
return;
}
} else if (flags & LAST_BOUNDARY) {
if (c == HYPHEN) {
callback(onPartEnd);
callback(onEnd);
state = END;
} else {
index = 0;
}
} else {
index = 0;
}
} else if (index - 2 == boundarySize) {
if (c == CR) {
index++;
} else {
index = 0;
}
} else if (index - boundarySize == 3) {
index = 0;
if (c == LF) {
callback(onPartEnd);
callback(onEnd);
state = END;
return;
}
}
if (index > 0) {
// when matching a possible boundary, keep a lookbehind reference
// in case it turns out to be a false lead
if (index - 1 >= lookbehindSize) {
setError("Parser bug: index overflows lookbehind buffer. "
"Please send bug report with input file attached.");
throw std::out_of_range("index overflows lookbehind buffer");
} else if (index - 1 < 0) {
setError("Parser bug: index underflows lookbehind buffer. "
"Please send bug report with input file attached.");
throw std::out_of_range("index underflows lookbehind buffer");
}
lookbehind[index - 1] = c;
} else if (prevIndex > 0) {
// if our boundary turned out to be rubbish, the captured lookbehind
// belongs to partData
callback(onPartData, lookbehind, 0, prevIndex);
prevIndex = 0;
partDataMark = i;
// reconsider the current character even so it interrupted the sequence
// it could be the beginning of a new sequence
i--;
}
}
public:
Callback onPartBegin;
Callback onHeaderField;
Callback onHeaderValue;
Callback onHeaderEnd;
Callback onHeadersEnd;
Callback onPartData;
Callback onPartEnd;
Callback onEnd;
void *userData;
MultipartParser() {
lookbehind = NULL;
resetCallbacks();
reset();
}
MultipartParser(const std::string &boundary) {
lookbehind = NULL;
resetCallbacks();
setBoundary(boundary);
}
~MultipartParser() {
delete[] lookbehind;
}
void reset() {
delete[] lookbehind;
state = ERROR;
boundary.clear();
boundaryData = boundary.c_str();
boundarySize = 0;
lookbehind = NULL;
lookbehindSize = 0;
flags = 0;
index = 0;
headerFieldMark = UNMARKED;
headerValueMark = UNMARKED;
partDataMark = UNMARKED;
errorReason = "Parser uninitialized.";
}
void setBoundary(const std::string &boundary) {
reset();
this->boundary = "\r\n--" + boundary;
boundaryData = this->boundary.c_str();
boundarySize = this->boundary.size();
indexBoundary();
lookbehind = new char[boundarySize + 8];
lookbehindSize = boundarySize + 8;
state = START;
errorReason = "No error.";
}
size_t feed(const char *buffer, size_t len) {
if (state == ERROR || len == 0) {
return 0;
}
State state = this->state;
int flags = this->flags;
size_t prevIndex = this->index;
size_t index = this->index;
size_t boundaryEnd = boundarySize - 1;
size_t i;
char c, cl;
for (i = 0; i < len; i++) {
c = buffer[i];
switch (state) {
case ERROR:
return i;
case START:
index = 0;
state = START_BOUNDARY;
case START_BOUNDARY:
if (index == boundarySize - 2) {
if (c != CR) {
setError("Malformed. Expected CR after boundary.");
return i;
}
index++;
break;
} else if (index - 1 == boundarySize - 2) {
if (c != LF) {
setError("Malformed. Expected LF after boundary CR.");
return i;
}
index = 0;
callback(onPartBegin);
state = HEADER_FIELD_START;
break;
}
if (c != boundary[index + 2]) {
setError("Malformed. Found different boundary data than the given one.");
return i;
}
index++;
break;
case HEADER_FIELD_START:
state = HEADER_FIELD;
headerFieldMark = i;
index = 0;
case HEADER_FIELD:
if (c == CR) {
headerFieldMark = UNMARKED;
state = HEADERS_ALMOST_DONE;
break;
}
index++;
if (c == HYPHEN) {
break;
}
if (c == COLON) {
if (index == 1) {
// empty header field
setError("Malformed first header name character.");
return i;
}
dataCallback(onHeaderField, headerFieldMark, buffer, i, len, true);
state = HEADER_VALUE_START;
break;
}
cl = lower(c);
if (cl < 'a' || cl > 'z') {
setError("Malformed header name.");
return i;
}
break;
case HEADER_VALUE_START:
if (c == SPACE) {
break;
}
headerValueMark = i;
state = HEADER_VALUE;
case HEADER_VALUE:
if (c == CR) {
dataCallback(onHeaderValue, headerValueMark, buffer, i, len, true, true);
callback(onHeaderEnd);
state = HEADER_VALUE_ALMOST_DONE;
}
break;
case HEADER_VALUE_ALMOST_DONE:
if (c != LF) {
setError("Malformed header value: LF expected after CR");
return i;
}
state = HEADER_FIELD_START;
break;
case HEADERS_ALMOST_DONE:
if (c != LF) {
setError("Malformed header ending: LF expected after CR");
return i;
}
callback(onHeadersEnd);
state = PART_DATA_START;
break;
case PART_DATA_START:
state = PART_DATA;
partDataMark = i;
case PART_DATA:
processPartData(prevIndex, index, buffer, len, boundaryEnd, i, c, state, flags);
break;
default:
return i;
}
}
dataCallback(onHeaderField, headerFieldMark, buffer, i, len, false);
dataCallback(onHeaderValue, headerValueMark, buffer, i, len, false);
dataCallback(onPartData, partDataMark, buffer, i, len, false);
this->index = index;
this->state = state;
this->flags = flags;
return len;
}
bool succeeded() const {
return state == END;
}
bool hasError() const {
return state == ERROR;
}
bool stopped() const {
return state == ERROR || state == END;
}
const char *getErrorMessage() const {
return errorReason;
}
};
#endif /* _MULTIPART_PARSER_H_ */