634 lines
17 KiB
C++
634 lines
17 KiB
C++
|
|
//-----------------------------------------------------------------------------
|
|
//
|
|
// @doc
|
|
//
|
|
// @module ppstr.cpp | implementation of Passport common string utilities
|
|
//
|
|
// Author: stevefu
|
|
//
|
|
// Date: 05/02/2000
|
|
//
|
|
// Copyright <cp> 1999-2000 Microsoft Corporation. All Rights Reserved.
|
|
//
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#include "stdafx.h"
|
|
#include <pputils.h>
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// @func
|
|
// convert MBCS string to UNICODE, optionally do HTML numeric decoding
|
|
// expects pszIn in the correct codepage and/or in HTML numeric decoding.
|
|
// @rdesc
|
|
// wOut returns the converted string. "" if error during conversion.
|
|
//-----------------------------------------------------------------------------
|
|
void Mbcs2Unicode(LPCSTR pszIn, //@parm the cstring to be converted
|
|
unsigned codepage, //@parm codepage pszIn is on
|
|
BOOL bNEC, //@parm do HTML numeric decoding or not
|
|
CStringW& wOut //@parm return the W string
|
|
)
|
|
{
|
|
// codepage 0 == CP_ACP, a perfectly valid code page
|
|
// ATLASSERT(codepage > 0);
|
|
wchar_t* p = NULL;
|
|
|
|
wOut = L"";
|
|
|
|
if (NULL == pszIn)
|
|
return;
|
|
|
|
int ret;
|
|
int maxlen;
|
|
maxlen = strlen(pszIn); // We deal with DBCS only.
|
|
if ( maxlen == 0 ) goto cleanup;
|
|
|
|
p = (wchar_t*)_alloca( sizeof(wchar_t) * (maxlen+4));
|
|
ATLASSERT( p != NULL );
|
|
ret = MultiByteToWideChar(codepage,
|
|
MB_PRECOMPOSED,
|
|
pszIn,
|
|
-1,
|
|
p,
|
|
maxlen + 1);
|
|
ATLASSERT( ret != 0 );
|
|
if ( ret == 0 ) goto cleanup;
|
|
|
|
wOut = p;
|
|
if ( !bNEC ) goto cleanup;
|
|
|
|
FixUpHtmlDecimalCharacters(wOut);
|
|
|
|
cleanup:
|
|
return ;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// @func
|
|
// convert UNICODE string to MBCS string, optionally do HTML numeric encoding
|
|
// for characters that can NOT be mapped into the given codepage.
|
|
// If you want everything in HTML numeric encoding, use Western codepage 1252.
|
|
//
|
|
// @rdesc
|
|
// aOut returns the converted string. "" if error during conversion.
|
|
//-----------------------------------------------------------------------------
|
|
void Unicode2Mbcs(LPCWSTR pwszIn, //@parm the W string to be converted
|
|
unsigned codepage, //@parm the codepage used for the conversion
|
|
BOOL bNEC, //@parm if TRUE, characters that don't fit into
|
|
// the given codepage will be NEC'ed
|
|
CStringA& aOut //@parm return the A string
|
|
)
|
|
{
|
|
// codepage 0 == CP_ACP, a perfectly valid code page
|
|
// ATLASSERT(codepage > 0);
|
|
char* p = NULL;
|
|
|
|
aOut = "";
|
|
|
|
if (NULL == pwszIn)
|
|
return;
|
|
|
|
int ret;
|
|
if ( ! bNEC )
|
|
{
|
|
int maxlen;
|
|
maxlen = wcslen(pwszIn) * 2; // We deal with DBCS only.
|
|
if ( maxlen == 0 ) goto cleanup;
|
|
|
|
char* p = (char*)_alloca( sizeof(char) * (maxlen+4));
|
|
ATLASSERT( p != NULL );
|
|
|
|
ret = WideCharToMultiByte(codepage,
|
|
0,
|
|
pwszIn,
|
|
-1,
|
|
p,
|
|
maxlen,
|
|
NULL,
|
|
NULL);
|
|
ATLASSERT( ret != 0 );
|
|
if ( ret == 0 ) goto cleanup;
|
|
|
|
aOut = p;
|
|
goto cleanup;
|
|
}
|
|
else // do it the slow way: convert one char at a time. If can't convert, do
|
|
// HTML numeric encoding
|
|
{
|
|
int i;
|
|
char strbuff[20]; // buffer for one single MBCS character or NEC
|
|
aOut.Preallocate(wcslen(pwszIn) * 3); //estimate: reduce re-allocate
|
|
|
|
for( i = 0; pwszIn[i] != L'\0'; i++ )
|
|
{
|
|
BOOL bDefault = FALSE;
|
|
ret = WideCharToMultiByte(codepage,
|
|
0,
|
|
&pwszIn[i],
|
|
1,
|
|
strbuff,
|
|
20,
|
|
NULL,
|
|
&bDefault);
|
|
if ( 0 != ret && !bDefault )
|
|
{
|
|
strbuff[ret] = '\0';
|
|
aOut += strbuff;
|
|
}
|
|
else
|
|
{
|
|
ATLASSERT(pwszIn[i] > 0);
|
|
ltoa(pwszIn[i], strbuff, 10);
|
|
aOut += "&#";
|
|
aOut += strbuff;
|
|
aOut += ";" ;
|
|
}
|
|
}
|
|
}
|
|
|
|
cleanup:
|
|
return;
|
|
}
|
|
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// convert HTML numeric encoding blocks (Ӓ etc) within a W string
|
|
// to UNICODE characters.
|
|
//*----------------------------------------------------------------------------
|
|
void FixUpHtmlDecimalCharacters(
|
|
CStringW& str //@parm in/out. the string to be converted
|
|
)
|
|
{
|
|
CStringW tmp = "";
|
|
wchar_t* pstr = str.LockBuffer();
|
|
|
|
int i;
|
|
int len = str.GetLength();
|
|
tmp.Preallocate(len+4); // avoid re-allocation
|
|
for( i = 0; i < len; i++)
|
|
{
|
|
if ( pstr[i] == L'&' && pstr[i+1] == L'#' )
|
|
{
|
|
int ndx = str.Find(L';', i+1);
|
|
if ( ndx != -1 && ndx > i+2)
|
|
{
|
|
pstr[ndx] = L'\0';
|
|
long wch = _wtoi(&pstr[i+2]);
|
|
if (wch > 0 && wch < static_cast<long>(USHRT_MAX))
|
|
{
|
|
tmp += static_cast<wchar_t>(wch);
|
|
}
|
|
i = ndx;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
//default case: append it
|
|
tmp += str[i];
|
|
}
|
|
|
|
str = tmp;
|
|
}
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// escaping special characters. supported characters are: ", <, > .
|
|
// this is used only for HTML escaping, not for URL escaping.
|
|
//*----------------------------------------------------------------------------
|
|
void HtmlEscapeString(
|
|
CStringW& str, //@parm in/out. the string to be escaped
|
|
LPCWSTR escch /*= L"\"<>"*/ //@parm in. the escape characters to check
|
|
// for:
|
|
)
|
|
{
|
|
ATLASSERT( escch != NULL );
|
|
CStringW strEsc(escch);
|
|
|
|
if (strEsc.Find(L'"') != -1)
|
|
{
|
|
str.Replace(L"\"", L""");
|
|
}
|
|
if (strEsc.Find(L'<') != -1)
|
|
{
|
|
str.Replace(L"<", L"<");
|
|
}
|
|
if (strEsc.Find(L'>') != -1)
|
|
{
|
|
str.Replace(L">", L">");
|
|
}
|
|
return;
|
|
}
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// escaping special characters. supported characters are: ", <, > .
|
|
// this is used only for HTML escaping, not for URL escaping.
|
|
//*----------------------------------------------------------------------------
|
|
void HtmlEscapeString(
|
|
CStringA& str, //@parm in/out. the string to be escaped
|
|
LPCSTR escch /*= L"\"<>"*/ //@parm in. the escape characters to check
|
|
// for:
|
|
)
|
|
{
|
|
ATLASSERT( escch != NULL );
|
|
|
|
if (strchr(escch, '"'))
|
|
{
|
|
str.Replace("\"", """);
|
|
}
|
|
if (strchr(escch, '<'))
|
|
{
|
|
str.Replace("<", "<");
|
|
}
|
|
if (strchr(escch, '>'))
|
|
{
|
|
str.Replace(">", ">");
|
|
}
|
|
return;
|
|
}
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// URL escaping unsafe characters specified under the URI RFC document
|
|
//*----------------------------------------------------------------------------
|
|
void UrlEscapeString(
|
|
CStringW& wStr //@parm in/out, string to be converted
|
|
)
|
|
{
|
|
int len;
|
|
int i;
|
|
wchar_t ch;
|
|
wchar_t cbuff[10];
|
|
CStringW wOutstr = L"";
|
|
|
|
len = wStr.GetLength();
|
|
if ( len == 0 ) return;
|
|
wOutstr.Preallocate(len*3); // avoid re-allocate
|
|
|
|
for ( i = 0; i < len; i++ )
|
|
{
|
|
ch = wStr[i];
|
|
if ( AtlIsUnsafeUrlChar((char)ch) )
|
|
{
|
|
//output the percent, followed by the hex value of the character
|
|
wOutstr += L'%';
|
|
swprintf(cbuff, L"%.2X", ch);
|
|
wOutstr += cbuff;
|
|
}
|
|
else //safe character
|
|
{
|
|
wOutstr += ch;
|
|
}
|
|
}
|
|
|
|
wStr = wOutstr;
|
|
return;
|
|
}
|
|
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// URL escaping unsafe characters specified under the URI RFC document
|
|
//*----------------------------------------------------------------------------
|
|
CStringA UrlEscapeStr(
|
|
const CStringA& oStr //@parm in/out, string to be converted
|
|
)
|
|
{
|
|
int iLen;
|
|
int iIndex;
|
|
char ch;
|
|
char cbuff[10];
|
|
CStringA oStrOut = "";
|
|
|
|
iLen = oStr.GetLength();
|
|
if (iLen == 0) { return oStr; }
|
|
|
|
oStrOut.Preallocate(iLen * 3); // avoid re-allocate
|
|
|
|
for (iIndex = 0; iIndex < iLen; iIndex++)
|
|
{
|
|
ch = oStr[iIndex];
|
|
if ( AtlIsUnsafeUrlChar(ch) )
|
|
{
|
|
//output the percent, followed by the hex value of the character
|
|
oStrOut += '%';
|
|
sprintf(cbuff, "%.2X", ch);
|
|
oStrOut += cbuff;
|
|
}
|
|
else //safe character
|
|
{
|
|
oStrOut += ch;
|
|
}
|
|
}
|
|
|
|
return oStrOut;
|
|
}
|
|
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// URL escaping unsafe characters specified under the URI RFC document
|
|
//*----------------------------------------------------------------------------
|
|
void UrlEscapeString(
|
|
CStringA& oStr //@parm in/out, string to be converted
|
|
)
|
|
{
|
|
int iLen;
|
|
int iIndex;
|
|
char ch;
|
|
char cbuff[10];
|
|
CStringA oStrOut = "";
|
|
|
|
iLen = oStr.GetLength();
|
|
if (iLen == 0) { return; }
|
|
|
|
oStrOut.Preallocate(iLen * 3); // avoid re-allocate
|
|
|
|
for (iIndex = 0; iIndex < iLen; iIndex++)
|
|
{
|
|
ch = oStr[iIndex];
|
|
if ( AtlIsUnsafeUrlChar(ch) )
|
|
{
|
|
//output the percent, followed by the hex value of the character
|
|
oStrOut += '%';
|
|
sprintf(cbuff, "%.2X", ch);
|
|
oStrOut += cbuff;
|
|
}
|
|
else //safe character
|
|
{
|
|
oStrOut += ch;
|
|
}
|
|
}
|
|
|
|
oStr = oStrOut;
|
|
return;
|
|
}
|
|
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// URL un-escaping unsafe characters specified under the URI RFC document
|
|
//*----------------------------------------------------------------------------
|
|
void UrlUnescapeString(
|
|
CStringW& wStr //@parm in/out, string to be converted
|
|
)
|
|
{
|
|
wchar_t* psrc;
|
|
wchar_t* pdest;
|
|
unsigned nValue;
|
|
|
|
psrc = pdest = wStr.GetBuffer();
|
|
while( *psrc != '\0' )
|
|
{
|
|
if (*psrc == L'%' && *(psrc+1) != '\0' && *(psrc+2) != '\0')
|
|
{
|
|
//currently assuming 2 hex values after '%'
|
|
//as per the RFC 2396 document
|
|
nValue = 16*AtlHexValue((char)*(psrc+1));
|
|
nValue+= AtlHexValue((char)*(psrc+2));
|
|
*pdest = (wchar_t) nValue;
|
|
psrc += 3;
|
|
}
|
|
else if ( *psrc == L'+' ) // special treatment for space
|
|
{
|
|
*pdest = L' ';
|
|
psrc++;
|
|
}
|
|
else //non-escape character
|
|
{
|
|
*pdest = *psrc;
|
|
psrc++;
|
|
}
|
|
pdest++;
|
|
}
|
|
*pdest = L'\0';
|
|
wStr.ReleaseBuffer();
|
|
|
|
return;
|
|
}
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// URL un-escaping unsafe characters specified under the URI RFC document
|
|
//*----------------------------------------------------------------------------
|
|
void UrlUnescapeString(
|
|
CStringA& aStr //@parm in/out, string to be converted
|
|
)
|
|
{
|
|
char* psrc;
|
|
char* pdest;
|
|
unsigned nValue;
|
|
|
|
psrc = pdest = aStr.GetBuffer();
|
|
while( *psrc != '\0' )
|
|
{
|
|
if (*psrc == '%' && *(psrc+1) != '\0' && *(psrc+2) != '\0')
|
|
{
|
|
//currently assuming 2 hex values after '%'
|
|
//as per the RFC 2396 document
|
|
nValue = 16*AtlHexValue((char)*(psrc+1));
|
|
nValue+= AtlHexValue((char)*(psrc+2));
|
|
*pdest = (char) nValue;
|
|
psrc += 3;
|
|
}
|
|
else if ( *psrc == '+' ) // special treatment for space
|
|
{
|
|
*pdest = ' ';
|
|
psrc++;
|
|
}
|
|
else //non-escape character
|
|
{
|
|
*pdest = *psrc;
|
|
psrc++;
|
|
}
|
|
pdest++;
|
|
}
|
|
*pdest = '\0';
|
|
aStr.ReleaseBuffer();
|
|
|
|
return;
|
|
}
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// copy BSTR contents to a string object and also free up the BSTR src
|
|
//*----------------------------------------------------------------------------
|
|
void BSTRMove(BSTR& src, //@parm in/out source string
|
|
CStringW& dest //@parm out, dest string object
|
|
)
|
|
{
|
|
dest = src;
|
|
::SysFreeString(src);
|
|
src = NULL;
|
|
}
|
|
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// copy BSTR contents to a string object and also free up the BSTR src
|
|
//*----------------------------------------------------------------------------
|
|
void BSTRMove(BSTR& src, //@parm in/out source string
|
|
CStringA& dest //@parm out, dest string object
|
|
)
|
|
{
|
|
wchar_t* p;
|
|
char c;
|
|
|
|
dest = "";
|
|
for ( p = src; *p != L'\0'; p++ )
|
|
{
|
|
c = (char) ( (*p) & 0xff ); // ignore high bits
|
|
dest += c;
|
|
}
|
|
::SysFreeString(src);
|
|
src = NULL;
|
|
}
|
|
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// convert a WCHAR character into a hex number
|
|
//*----------------------------------------------------------------------------
|
|
long HexToNum(wchar_t c)
|
|
{
|
|
return ((c >= L'0' && c <= L'9') ? (c - L'0') : ((c >= 'A' && c <= 'F') ? (c - L'A' + 10) : -1));
|
|
}
|
|
|
|
//*----------------------------------------------------------------------------
|
|
// @func
|
|
// convert a wide char hex string to its numeric equivalent
|
|
//*----------------------------------------------------------------------------
|
|
long FromHex(LPCWSTR pszHexString)
|
|
{
|
|
long lResult = 0;
|
|
long lCurrent;
|
|
LPWSTR pszCurrent;
|
|
|
|
for(pszCurrent = const_cast<LPWSTR>(pszHexString); *pszCurrent; pszCurrent++)
|
|
{
|
|
if((lCurrent = HexToNum(towupper(*pszCurrent))) == -1)
|
|
break; // illegal character, we're done
|
|
|
|
lResult = (lResult << 4) + lCurrent;
|
|
}
|
|
|
|
return lResult;
|
|
}
|
|
|
|
|
|
|
|
void EncodeXMLString(CStringA& str)
|
|
{
|
|
/*
|
|
Any occurrence of & must be replaced by &
|
|
Any occurrence of < must be replaced by <
|
|
Any occurrence of > must be replaced by >
|
|
Any occurrence of " (double quote) must be replaced by "
|
|
*/
|
|
str.Replace("&", "&");
|
|
str.Replace("<", "<");
|
|
str.Replace(">", ">");
|
|
str.Replace("\"", """);
|
|
}
|
|
|
|
void EncodeXMLString(CStringW& str)
|
|
{
|
|
/*
|
|
Any occurrence of & must be replaced by &
|
|
Any occurrence of < must be replaced by <
|
|
Any occurrence of > must be replaced by >
|
|
Any occurrence of " (double quote) must be replaced by "
|
|
*/
|
|
str.Replace(L"&", L"&");
|
|
str.Replace(L"<", L"<");
|
|
str.Replace(L">", L">");
|
|
str.Replace(L"\"", L""");
|
|
}
|
|
|
|
void EncodeWMLString(CStringA& str)
|
|
{
|
|
/*
|
|
on top of XML, change $ --> $$
|
|
*/
|
|
|
|
EncodeXMLString(str);
|
|
str.Replace("$", "$$");
|
|
}
|
|
|
|
void EncodeWMLString(CStringW& str)
|
|
{
|
|
/*
|
|
on top of XML, change $ --> $$
|
|
*/
|
|
|
|
EncodeXMLString(str);
|
|
str.Replace(L"$", L"$$");
|
|
}
|
|
|
|
void EncodeHDMLString(CStringA & str)
|
|
{
|
|
/*
|
|
on top of XML, change $ --> $$
|
|
*/
|
|
|
|
EncodeXMLString(str);
|
|
str.Replace("$", "&dol;");
|
|
}
|
|
|
|
void EncodeHDMLString(CStringW & str)
|
|
{
|
|
/*
|
|
on top of XML, change $ --> $$
|
|
*/
|
|
|
|
EncodeXMLString(str);
|
|
str.Replace(L"$", L"&dol;");
|
|
}
|
|
|
|
void ToHexStr(CStringA& outputToAppend, LPCWSTR instr) throw()
|
|
{
|
|
char temp[6];
|
|
while(*instr)
|
|
{
|
|
sprintf(temp, "%04x", *instr);
|
|
outputToAppend += temp;
|
|
++instr;
|
|
}
|
|
}
|
|
|
|
void ToHexStr(CStringA& outputToAppend, unsigned short in) throw()
|
|
{
|
|
WCHAR temp[10];
|
|
wsprintf(temp, L"%-hu", in);
|
|
ToHexStr(outputToAppend, temp);
|
|
}
|
|
|
|
void ToHexStr(CStringA& outputToAppend, unsigned long in) throw()
|
|
{
|
|
WCHAR temp[10];
|
|
wsprintf(temp, L"%-lu", in);
|
|
ToHexStr(outputToAppend, temp);
|
|
}
|
|
|
|
|
|
void ToHexStr(CStringA& outputToAppend, PBYTE pData, ULONG len) throw()
|
|
{
|
|
#define TOHEX(a) ((a)>=10 ? 'a'+(a)-10 : '0'+(a))
|
|
_ASSERT(pData);
|
|
_ASSERT(len != 0);
|
|
|
|
UINT v;
|
|
char temp[2];
|
|
temp[2] = 0;
|
|
for(ULONG i = 0; i < len; ++i, ++pData)
|
|
{
|
|
v = *pData >> 4;
|
|
temp[0] = TOHEX( v );
|
|
v = *pData & 0x0f;
|
|
temp[1] = TOHEX( v );
|
|
outputToAppend += temp;
|
|
}
|
|
}
|
|
|