/*++

Copyright (c) 1996 Microsoft Corporation

Module Name :

    linkload.cpp

Abstract:

    Link loader class definitions. It uses wininet API
	to load the web page from the internet. 

Author:

    Michael Cheuk (mcheuk)				22-Nov-1996

Project:

    Link Checker

Revision History:

--*/

#include "stdafx.h"
#include "linkload.h"

#include "link.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

// Constants
const int iMaxRedirectCount_c = 3;
const UINT nReadFileBufferSize_c = 4096;
const UINT nQueryResultBufferSize_c = 1024;


BOOL 
CLinkLoader::Create(
	const CString& strUserAgent, 
	const CString& strAdditonalHeaders
	)
/*++

Routine Description:

    One time link loader create funtion

Arguments:

    strUserAgent - HTTP user agent name
	strAdditonalHeaders - addtional HTTP headers

Return Value:

    BOOL - TRUE if success. FALSE otherwise.

--*/
{
	// Make sure wininet.dll is loaded
	ASSERT(CWininet::IsLoaded());
    if(!CWininet::IsLoaded())
    {
        return FALSE;
    }

    // Save the additional header
	m_strAdditionalHeaders = strAdditonalHeaders;

	// Open an internet session
    m_hInternetSession = CWininet::InternetOpenA(
							strUserAgent,
							PRE_CONFIG_INTERNET_ACCESS, 
							NULL,
							INTERNET_INVALID_PORT_NUMBER,
							0);

#ifdef _DEBUG
	if(!m_hInternetSession)
	{
		TRACE(_T("CLinkLoader::Create() - InternetOpen() failed. GetLastError() = %d\n"),
		GetLastError());
	}
#endif

    return (m_hInternetSession != NULL);

} // CLinkLoader::Create


BOOL 
CLinkLoader::ChangeProperties(
	const CString& strUserAgent, 
	const CString& strAdditionalHeaders
	)
/*++

Routine Description:

    Change the loader properties

Arguments:

    strUserAgent - HTTP user agent name
	strAdditonalHeaders - addtional HTTP headers

Return Value:

    BOOL - TRUE if success. FALSE otherwise.

--*/
{
	if(m_hInternetSession)
	{
		// Close the previous internet session and
		// call Create() again
		VERIFY(CWininet::InternetCloseHandle(m_hInternetSession));
		return Create(strUserAgent, strAdditionalHeaders);
	}

	return FALSE;

} // CLinkLoader::ChangeProperties


BOOL 
CLinkLoader::Load(
	CLink& link,
	BOOL fReadFile
	)
/*++

Routine Description:

    Load a web link

Arguments:

    link - reference to the result link object
	fReadFile - read the file and save it in the link object

Return Value:

    BOOL - TRUE if success. FALSE otherwise.

--*/
{
	// Make sure we have a session avaiable
	ASSERT(m_hInternetSession);
    if(!m_hInternetSession)
	{
		return FALSE;
	}

	// Crack the URL 
	TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
	TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];
	URL_COMPONENTS urlcomp;

	memset(&urlcomp, 0, sizeof(urlcomp));
	urlcomp.dwStructSize = sizeof(urlcomp);

	urlcomp.lpszHostName = (LPTSTR) &szHostName;
	urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;

	urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
	urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;

	if(!CWininet::InternetCrackUrlA(link.GetURL(), link.GetURL().GetLength(), NULL, &urlcomp))
	{
		TRACE(_T("CLinkLoader::Load() - InternetCrackUrl() failed. GetLastError() = %d\n"), 
			GetLastError());
		return FALSE;
	}

	// Make sure we have a valid (non zero length) URL path
	if(_tcslen(szUrlPath) == 0)
	{
		_tprintf(szUrlPath, "%s", _TCHAR('/'));
	}

	// Call the appropriate load funtion for different URL schemes
	if(urlcomp.nScheme == INTERNET_SCHEME_HTTP)
	{
		return LoadHTTP(link, fReadFile, szHostName, szUrlPath);
	}
	else if(urlcomp.nScheme >= INTERNET_SCHEME_FTP && 
		urlcomp.nScheme <= INTERNET_SCHEME_HTTPS)
	{
		return LoadURL(link);
	}
	else
	{
		TRACE(_T("CLinkLoader::Load() - unsupport URL scheme(%d)\n"), urlcomp.nScheme); 
		link.SetState(CLink::eUnsupport);
		return FALSE;
	}

} // CLinkLoader::Load


BOOL 
CLinkLoader::LoadURL(
	CLink& link
	)
/*++

Routine Description:

    Load a URL (non-HTTP) link

Arguments:

    link - reference to the result link object
    
Return Value:

    BOOL - TRUE if success. FALSE otherwise.

--*/
{
	// Use InternetOpenUrl for all URL scheme except HTTP
	CAutoInternetHandle hOpenURL;
	hOpenURL = CWininet::InternetOpenUrlA(
		m_hInternetSession,
		link.GetURL(),
		NULL,
		0,
		INTERNET_FLAG_DONT_CACHE,
		0);

	if(!hOpenURL)
	{
		TRACE(_T("CLinkLoader::LoadURL() - InternetOpenUrlA() failed."));
		return WininetFailed(link);
	}
	else
	{
		link.SetState(CLink::eValidURL);
		return TRUE;
	}

} // CLinkLoader::LoadURL


BOOL 
CLinkLoader::LoadHTTP(
	CLink& link,
	BOOL fReadFile,
	LPCTSTR lpszHostName,
	LPCTSTR lpszUrlPath,
	int iRedirectCount /* = 0 */
	)
/*++

Routine Description:

    Load a HTTP link

Arguments:

    link - reference to the result link object
    fReadFile - read the file and save it in the link object
    lpszHostName - hostname
	lpszUrlPath - URL path
	iRedirectCount - Looping count. It is used to keep track the
                     the number of redirection for current link.

Return Value:

    BOOL - TRUE if success. FALSE otherwise.

--*/
{
	// Open an http session
	CAutoInternetHandle hHttpSession;
	hHttpSession = CWininet::InternetConnectA(
						m_hInternetSession,				// hInternetSession
						lpszHostName,				// lpszServerName
						INTERNET_INVALID_PORT_NUMBER,	// nServerPort
						_T(""),								// lpszUsername
						_T(""),								// lpszPassword
						INTERNET_SERVICE_HTTP,			// dwService
						0,								// dwFlags
						0);								// dwContext
	
	if(!hHttpSession)
	{
		TRACE(_T("CLinkLoader::LoadHTTP() - InternetConnect() failed."));
		return WininetFailed(link);
	}

	// Open an http request
	CAutoInternetHandle hHttpRequest;
	hHttpRequest = CWininet::HttpOpenRequestA(
						hHttpSession,				// hHttpSession
						_T("GET"),				// lpszVerb
                        lpszUrlPath,			// lpszObjectName
						HTTP_VERSION,				// lpszVersion
						link.GetBase(),			// lpszReferer
						NULL,						// lpszAcceptTypes
						INTERNET_FLAG_NO_AUTO_REDIRECT | INTERNET_FLAG_DONT_CACHE,	// dwFlags
						0);							// dwContext

	if(!hHttpRequest)
	{
		TRACE(_T("CLinkLoader::LoadHTTP() - HttpOpenRequest() failed."));
		return WininetFailed(link);
	}

	// Sent the http request
	if(!CWininet::HttpSendRequestA(
				hHttpRequest,	// hHttpRequest
				m_strAdditionalHeaders,	// lpszHeaders
				(DWORD)-1,		// dwHeadersLength
				0,				// lpOptional
				0))				// dwOptionalLength
	{
		TRACE(_T("CLinkLoader::LoadHTTP() - HttpSendRequest() failed."));
		return WininetFailed(link);
	}

	TCHAR szQueryResult[nQueryResultBufferSize_c];
	DWORD dwQueryLength = sizeof(szQueryResult);

	// Check the result status code
	if(!CWininet::HttpQueryInfoA(
				hHttpRequest,			// hHttpRequest
				HTTP_QUERY_STATUS_CODE,	// dwInfoLevel
				szQueryResult,			// lpvBuffer
				&dwQueryLength,			// lpdwBufferLength
				NULL))					// lpdwIndex
	{
		TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
		return WininetFailed(link);
	}

	// Check for 301 Move Permanently or 302 Move Temporarily
	if(_ttoi(szQueryResult) == 301 || _ttoi(szQueryResult) == 302)
	{
		// We can only redirect iMaxRedirectCount_c times
		if(iRedirectCount > iMaxRedirectCount_c)
		{
			return FALSE;
		}

		// Get the new location
		dwQueryLength = sizeof(szQueryResult);

		if(!CWininet::HttpQueryInfoA(
				hHttpRequest,			// hHttpRequest
				HTTP_QUERY_LOCATION,	// dwInfoLevel
				szQueryResult,			// lpvBuffer
				&dwQueryLength,			// lpdwBufferLength
				NULL))					// lpdwIndex
		{
			TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
			return WininetFailed(link);
		}

		// We only update the URL in link object if
		// we are redirecting from http://hostname/xyz to http://hostname/xyz/
		if(link.GetURL().GetLength() + 1 == (int)dwQueryLength &&
		   link.GetURL().GetAt(link.GetURL().GetLength() - 1) != _TCHAR('/') &&
		   szQueryResult[dwQueryLength - 1] == _TCHAR('/'))
		{
			link.SetURL(szQueryResult);
		}

		// Crack the URL & call LoadHTTP again
		TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
		TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];

		// Crack the URL 
		URL_COMPONENTS urlcomp;

		memset(&urlcomp, 0, sizeof(urlcomp));
		urlcomp.dwStructSize = sizeof(urlcomp);

		urlcomp.lpszHostName = (LPTSTR) &szHostName;
		urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;

		urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
		urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;

		VERIFY(CWininet::InternetCrackUrlA(szQueryResult, dwQueryLength, NULL, &urlcomp));

		return LoadHTTP(link, fReadFile, szHostName, szUrlPath, ++iRedirectCount);
	}


	// Update the HTTP status code
	link.SetStatusCode(_ttoi(szQueryResult));
	
	// If the status code is not 2xx. it is a invalid link
	if(szQueryResult[0] != '2')
	{
		link.SetState(CLink::eInvalidHTTP);

		// Get the new location
		dwQueryLength = sizeof(szQueryResult);

		if(CWininet::HttpQueryInfoA(
				hHttpRequest,			// hHttpRequest
				HTTP_QUERY_STATUS_TEXT,	// dwInfoLevel
				szQueryResult,			// lpvBuffer
				&dwQueryLength,			// lpdwBufferLength
				NULL))					// lpdwIndex
		{
			link.SetStatusText(szQueryResult);
		}

		return FALSE;
	}

	// Now we have a valid http link
	link.SetState(CLink::eValidHTTP);

	// If we are not reading the file, we can return now
	if(!fReadFile)
	{
		return TRUE;
	}

	// Check the result content-type
	dwQueryLength = sizeof(szQueryResult);
	if(!CWininet::HttpQueryInfoA(
				hHttpRequest,			// hHttpRequest
				HTTP_QUERY_CONTENT_TYPE,// dwInfoLevel
				szQueryResult,			// lpvBuffer
				&dwQueryLength,			// lpdwBufferLength
				NULL))					// lpdwIndex
	{
		TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
		return WininetFailed(link);
	}
				
	// We only load the html text for parsing
	if(!_tcsstr(szQueryResult, _T("text/html")) )
	{
		return TRUE;
	}

	link.SetContentType(CLink::eText);

	CString strBuffer;
	TCHAR buf[nReadFileBufferSize_c];
	DWORD dwBytesRead;

	// Load the text html in a loop
	do
	{
		memset(buf, 0, sizeof(buf));

		if(CWininet::InternetReadFile(
						hHttpRequest,	// hFile
						buf,			// lpBuffer
						sizeof(buf),	// dwNumberOfBytesToRead
						&dwBytesRead))	// lpNumberOfBytesRead
		{
			strBuffer += buf;
		}
		else
		{
			TRACE(_T("CLinkLoader::LoadHTTP() - InternetReadFile() failed."));
			return WininetFailed(link);
		}
	}
	while(dwBytesRead);

	// Set the InternetReadFile result in the link object
	link.SetData(strBuffer);

	return TRUE;

} // CLinkLoader::LoadHTTP



BOOL 
CLinkLoader::WininetFailed(
	CLink& link
	)
/*++

Routine Description:

    Wininet failed clean up subroutine

Arguments:

    link - reference to the result link object

Return Value:

    BOOL - Alway return TRUE

--*/
{
	link.SetState(CLink::eInvalidWininet);
	link.SetStatusCode(GetLastError());
	TRACE(_T(" GetLastError() = %d\n"), link.GetStatusCode());

	LPTSTR lpMsgBuf;
 
	if(FormatMessage( 
		FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | FORMAT_MESSAGE_FROM_SYSTEM,
		CWininet::GetWininetModule(),
		GetLastError(),
		MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
		(LPTSTR) &lpMsgBuf,
		0,
		NULL) > 0)
	{
		link.SetStatusText(lpMsgBuf);
		LocalFree(lpMsgBuf);
	}

	return FALSE;

} // CLinkLoader::WininetFailed