2025-04-27 07:49:33 -04:00

628 lines
18 KiB
C++

#include <wininetp.h>
#include <perfdiag.hxx>
#include "httpp.h"
//
// HTTP_HEADER_PARSER implementation
//
HTTP_HEADER_PARSER::HTTP_HEADER_PARSER(
IN LPSTR szHeaders,
IN DWORD cbHeaders
) : HTTP_HEADERS()
/*++
Routine Description:
Constructor for the HTTP_HEADER_PARSER object. Calls ParseHeaders to
build a parsed version of the header string passed in.
Arguments:
szHeaders - pointer to the headers to parse
cbHeaders - length of the headers
Return Value:
None.
--*/
{
DWORD dwBytesScaned = 0;
BOOL fFoundCompleteLine;
BOOL fFoundEndOfHeaders;
DWORD error;
error = ParseHeaders(
szHeaders,
cbHeaders,
TRUE, // Eof
&dwBytesScaned,
&fFoundCompleteLine,
&fFoundEndOfHeaders
);
INET_ASSERT(error == ERROR_SUCCESS);
INET_ASSERT(fFoundCompleteLine);
INET_ASSERT(fFoundEndOfHeaders);
}
/* // some test cases which can be used to test ParseStatusLine()
char bad1[] = "HTTP1.1 200 Description yeah yeah\r\n";
char bad2[] = "HTTP/1234.1 200 Description yeah yeah\r\n";
char bad3[] = "HTTP/1.1234 200 Description yeah yeah\r\n";
char bad4[] = "HTTP/1.1 1234 Description yeah yeah\r\n";
char bad5[] = "HTTP/ 1.1 200 Description yeah yeah\r\n";
char bad6[] = "HTTP/1.1 200 Description yeah yeah\r\n";
char bad7[] = "HTTP/1.1 200Description yeah yeah\r\n";
char bad8[3000] = "HTTP/1.1 200 Description yeah yeah";
char bad9[] = "HTTP/1 1.1 200 Description yeah yeah\r\n";
char good1[] = "HTTP/ 123.123 200 Description yeah yeah\r\n";
*/
DWORD
HTTP_HEADER_PARSER::ParseStatusLine(
IN LPSTR lpHeaderBase,
IN DWORD dwBufferLength,
IN BOOL fEof,
IN OUT DWORD *lpdwBufferLengthScanned,
OUT BOOL *lpfNeedMoreBuffer,
OUT DWORD *lpdwStatusCode,
OUT DWORD *lpdwMajorVersion,
OUT DWORD *lpdwMinorVersion
)
/*++
Routine Description:
Parses the Status line of an HTTP server response. Takes care of adding the status
line to HTTP header array.
From HTTP v1.1. spec:
{
Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF
HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT
Status-Code = 1*DIGIT
Reason-Phrase = *<TEXT, excluding CR LF>
(1*DIGIT means at least one digit, maybe more)
}
WinHTTP strictly enforces the status line spec. The only exception is that up to 3
spaces are allowed before the Status-Code and major version number.
To prevent malicious servers from hogging the channel, the integers are limited to
3 digits and the Reason-Phrase is limited to GlobalMaxSizeStatusLineResultText characters.
"HTTP" could be other things like "S-HTTP", this is checked by UpdateFromHeaders()
before ParseStatusLine() is called. The existence of the first '/' is verified before
ParseStatusLine is called.
Arguments:
lpszHeader - pointer to the header to check
dwHeaderLength - length of the header
Return Value:
BOOL - TRUE if line was successively parsed and processed, FALSE otherwise
--*/
{
#define BEFORE_VERSION_NUMBERS 0
#define MAJOR_VERSION_NUMBER 1
#define MINOR_VERSION_NUMBER 2
#define STATUS_CODE_NUMBER 3
#define AFTER_STATUS_CODE 4
#define MAX_STATUS_INTS 4
LPSTR lpszEnd = lpHeaderBase + dwBufferLength;
LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned;
DWORD dwBytesScanned = 0;
DWORD dwStatusLineLength = 0;
LPSTR lpszStatusLine = NULL;
int ver_state = BEFORE_VERSION_NUMBERS;
BOOL afStatusIntsFound[MAX_STATUS_INTS] = {0};
DWORD adwStatusInts[MAX_STATUS_INTS] = {0};
DWORD dwStatusPieceLength = 0;
BOOL error = ERROR_WINHTTP_INTERNAL_ERROR;
lpszStatusLine = response;
//
// While walking the Status Line looking for terminating \r\n,
// we extract the Major.Minor Versions and Status Code in that order.
// text and spaces will lie between/before/after the three numbers
// but the idea is to remeber which number we're calculating based on a numeric state
// If all goes well the loop will churn out an array with the 3 numbers plugged in as DWORDs
//
while ((response < lpszEnd) && (*response != '\r') && (*response != '\n'))
{
switch (ver_state)
{
case BEFORE_VERSION_NUMBERS:
//
// We've already matched the status line with something
//of the form "****/" in UpdateFromHeaders(), we can ignore everything
//through the first '/'.
//
if (*response == '/')
{
INET_ASSERT(ver_state == BEFORE_VERSION_NUMBERS);
ver_state++; // = MAJOR_VERSION_NUMBER
dwStatusPieceLength = 0; // next piece is either spaces or an int
}
break;
case MAJOR_VERSION_NUMBER:
if (*response == '.' && ver_state == MAJOR_VERSION_NUMBER)
{
ver_state++; // = MINOR_VERSION_NUMBER
dwStatusPieceLength = 0; // next piece is an int
break;
}
// fall through
case MINOR_VERSION_NUMBER:
if (*response == ' ' && ver_state == MINOR_VERSION_NUMBER)
{
ver_state++; // = STATUS_CODE_NUMBER
dwStatusPieceLength = 0; // next piece is either spaces or an int.
break;
}
// fall through
case STATUS_CODE_NUMBER:
if (isdigit(*response))
{
if (!afStatusIntsFound[ver_state])
{
// transitioning from counting spaces
//to counting integers
dwStatusPieceLength = 0;
}
// Allow up to 3 digits per integer.
if (++dwStatusPieceLength > 3)
goto doneInvalidStatusLine;
int val = *response - '0';
afStatusIntsFound[ver_state] = TRUE;
adwStatusInts[ver_state] = adwStatusInts[ver_state] * 10 + val;
}
else if (adwStatusInts[STATUS_CODE_NUMBER] > 0 )
{
INET_ASSERT(ver_state == STATUS_CODE_NUMBER);
if (*response != ' ')
goto doneInvalidStatusLine;
ver_state++; // = AFTER_STATUS_CODE
dwStatusPieceLength = 0; // next piece is the status line
break;
}
else if (*response == ' ' && !afStatusIntsFound[ver_state])
{
//
// Before processing MAJOR_VERSION_NUMBER or STATUS_CODE_NUMBER,
//allow up to 3 spaces.
//
// Multiple spaces are being allowed here because it is
//legacy behavior and may therefore be necessary, and being non-strict
//about it doesn't put anything at risk.
//
if (++dwStatusPieceLength > 3)
goto doneInvalidStatusLine;
}
else
{
// We fail if anything outside the spec is found, except
//for allowing multiple spaces before the status code.
goto doneInvalidStatusLine;
}
break;
case AFTER_STATUS_CODE:
//
// This will advance to the next CR or LF..
//
// We limit Reason-Phrase length to protect against malicious socket hogging
//
if (++dwStatusPieceLength > GlobalMaxSizeStatusLineResultText)
{
goto doneInvalidStatusLine;
}
break;
}
++response;
++dwBytesScanned;
}
dwStatusLineLength = dwBytesScanned;
// If everything has been to spec so far we now expect a final CRLF. For interop with certain
//sloppy servers we allow 0-2 CRs before the LF.
if (response==lpszEnd)
goto doneNeedMoreData;
// CR
if ( *response == '\r')
{
++response;
++dwBytesScanned;
}
if (response==lpszEnd)
goto doneNeedMoreData;
// CR
if ( *response == '\r')
{
++response;
++dwBytesScanned;
}
if (response==lpszEnd)
goto doneNeedMoreData;
//LF
if ( *response != '\n')
goto doneInvalidStatusLine;
++response; // we know its safe to step again since we checked if (respone+1 == lpszEnd) above.
++dwBytesScanned;
//
// Some validation checking
//
// All three status ints must have been found.
// I found some code that assumes that if the Status Code == 0, then
//the status line hasn't been parsed yet. To be sure this assumption
//remains true, explicitly reject status lines with a 0 status code.
//
if (afStatusIntsFound[MAJOR_VERSION_NUMBER] != TRUE
|| afStatusIntsFound[MINOR_VERSION_NUMBER] != TRUE
|| afStatusIntsFound[STATUS_CODE_NUMBER] != TRUE
|| adwStatusInts[STATUS_CODE_NUMBER] == 0)
{
goto doneInvalidStatusLine;
}
//
// Now we have our parsed header to add to the array
//
HEADER_STRING * freeHeader;
DWORD iSlot;
freeHeader = FindFreeSlot(&iSlot);
if (freeHeader == NULL) {
INET_ASSERT(FALSE);
goto doneFailError;
} else {
INET_ASSERT(iSlot == 0); // status line should always be first
freeHeader->CreateOffsetString((DWORD)(lpszStatusLine - lpHeaderBase), dwStatusLineLength);
freeHeader->SetHash(0); // status line has no hash value.
}
//
// Success.. fill in the output params appropriately.
//
*lpfNeedMoreBuffer = FALSE;
*lpdwStatusCode = adwStatusInts[STATUS_CODE_NUMBER];
*lpdwMajorVersion = adwStatusInts[MAJOR_VERSION_NUMBER];
*lpdwMinorVersion = adwStatusInts[MINOR_VERSION_NUMBER];
*lpdwBufferLengthScanned += dwBytesScanned;
error = ERROR_SUCCESS;
goto exitFinalReturn;
doneNeedMoreData:
if (fEof)
goto doneInvalidStatusLine;
error = ERROR_SUCCESS;
*lpfNeedMoreBuffer = TRUE;
goto exitFinalReturn;
doneInvalidStatusLine:
error = ERROR_WINHTTP_INVALID_SERVER_RESPONSE;
*lpfNeedMoreBuffer = FALSE;
goto exitFinalReturn;
doneFailError:
error = ERROR_WINHTTP_INTERNAL_ERROR;
*lpfNeedMoreBuffer = FALSE;
goto exitFinalReturn;
exitFinalReturn:
return error;
}
DWORD
HTTP_HEADER_PARSER::ParseHeaders(
IN LPSTR lpHeaderBase,
IN DWORD dwBufferLength,
IN BOOL fEof,
IN OUT DWORD *lpdwBufferLengthScanned,
OUT LPBOOL pfFoundCompleteLine,
OUT LPBOOL pfFoundEndOfHeaders
)
/*++
Routine Description:
Loads headers into HTTP_HEADERS member for subsequent parsing.
Parses string based headers and adds their parts to an internally stored
array of HTTP_HEADERS.
Input is assumed to be well formed Header Name/Value pairs, each deliminated
by ':' and '\r\n'.
Arguments:
lpszHeader - pointer to the header to check
dwHeaderLength - length of the header
Return Value:
None.
--*/
{
LPSTR lpszEnd = lpHeaderBase + dwBufferLength;
LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned;
DWORD dwBytesScanned = 0;
BOOL success = FALSE;
DWORD error = ERROR_SUCCESS;
*pfFoundEndOfHeaders = FALSE;
//
// Each iteration of the following loop
// walks an HTTP header line of the form:
// HeaderName: HeaderValue\r\n
//
do
{
DWORD dwHash = HEADER_HASH_SEED;
LPSTR lpszHeaderName;
DWORD dwHeaderNameLength = 0;
DWORD dwHeaderLineLength = 0;
DWORD dwPreviousAmountOfBytesScanned = dwBytesScanned;
//
// Remove leading whitespace from header
//
while ( (response < lpszEnd) && ((*response == ' ') || (*response == '\t')) )
{
++response;
++dwBytesScanned;
}
//
// Scan for HeaderName:
//
lpszHeaderName = response;
dwPreviousAmountOfBytesScanned = dwBytesScanned;
while ((response < lpszEnd) && (*response != ':') && (*response != '\r') && (*response != '\n'))
{
//
// This code incapsulates CalculateHashNoCase as an optimization,
// we attempt to calculate the Hash value as we parse the header.
//
CHAR ch = *response;
if ((ch >= 'A') && (ch <= 'Z')) {
ch = MAKE_LOWER(ch);
}
dwHash += (DWORD)(dwHash << 5) + ch;
++response;
++dwBytesScanned;
}
dwHeaderNameLength = (DWORD) (response - lpszHeaderName);
//
// catch bogus responses: if we find what looks like one of a (very)
// small set of HTML tags, then assume the previous header was the
// last
//
if ((dwHeaderNameLength >= sizeof("<HTML>") - 1)
&& (*lpszHeaderName == '<')
&& (!strnicmp(lpszHeaderName, "<HTML>", sizeof("<HTML>") - 1)
|| !strnicmp(lpszHeaderName, "<HEAD>", sizeof("<HEAD>") - 1))) {
*pfFoundEndOfHeaders = TRUE;
break;
}
//
// Keep scanning till end of the line.
//
while ((response < lpszEnd) && (*response != '\r') && (*response != '\n'))
{
++response;
++dwBytesScanned;
}
dwHeaderLineLength = (DWORD) (response - lpszHeaderName); // note: this headerLINElength
if (response == lpszEnd) {
//
// response now points one past the end of the buffer. We may be looking
// over the edge...
//
// if we're at the end of the connection then the server sent us an
// incorrectly formatted response. Probably an error.
//
// Otherwise its a partial response. We need more
//
DEBUG_PRINT(HTTP,
INFO,
("found end of short response\n"
));
success = fEof ? TRUE : FALSE;
//
// if we really hit the end of the response then update the amount of
// headers scanned
//
if (!success) {
dwBytesScanned = dwPreviousAmountOfBytesScanned;
}
break;
}
else
{
//
// we reached a CR or LF. This is the end of this current header. Find
// the start of the next one
//
//
// first, strip off any trailing spaces from the current header. We do
// this by simply reducing the string length. We only look for space
// and tab characters. Only do this if we have a non-zero length header
//
if (dwHeaderLineLength != 0) {
for (int i = -1; response[i] == ' ' || response[i] == '\t'; --i) {
--dwHeaderLineLength;
}
}
INET_ASSERT((int)dwHeaderLineLength >= 0);
//
// some servers respond with "\r\r\n". Lame
// A new twist: "\r \r\n". Lamer
//
while ((response < lpszEnd)
&& ((*response == '\r') || (*response == ' '))) {
++response;
++dwBytesScanned;
}
if (response == lpszEnd) {
//
// hit end of buffer without finding LF
//
success = FALSE;
DEBUG_PRINT(HTTP,
WARNING,
("hit end of buffer without finding LF\n"
));
//
// get more data, reparse this line
//
dwBytesScanned = dwPreviousAmountOfBytesScanned;
break;
} else if (*response == '\n') {
++response;
++dwBytesScanned;
//
// if we found the empty line then we are done
//
if (dwHeaderLineLength == 0) {
*pfFoundEndOfHeaders = TRUE;
break;
}
success = TRUE;
}
}
//
// Now we have our parsed header to add to the array
//
HEADER_STRING * freeHeader;
DWORD iSlot;
freeHeader = FindFreeSlot(&iSlot);
if (freeHeader == NULL) {
error = GetError();
goto quit;
} else {
freeHeader->CreateOffsetString((DWORD) (lpszHeaderName - lpHeaderBase), dwHeaderLineLength);
freeHeader->SetHash(dwHash);
}
//CHAR szTemp[256];
//
//memcpy(szTemp, lpszHeaderName, dwHeaderLineLength);
//lpszHeaderName[dwHeaderLineLength] = '\0';
//DEBUG_PRINT(HTTP,
// INFO,
// ("ParseHeaders: adding=%q\n", lpszHeaderName
// ));
//
// Now see if this is a known header we are adding, if so then we note that fact
//
DWORD dwKnownQueryIndex;
if (HeaderMatch(dwHash, lpszHeaderName, dwHeaderNameLength, &dwKnownQueryIndex) )
{
freeHeader->SetNextKnownIndex(FastAdd(dwKnownQueryIndex, iSlot));
}
} while (TRUE);
quit:
*lpdwBufferLengthScanned += dwBytesScanned;
*pfFoundCompleteLine = success;
return error;
}