335 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			335 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*++
 | 
						|
 | 
						|
   Copyright    (c)    1995-1997    Microsoft Corporation
 | 
						|
 | 
						|
   Module  Name :
 | 
						|
      spechash.cxx
 | 
						|
 | 
						|
   Abstract:
 | 
						|
      This module calculates the hash value for SpecWeb96 style file-names
 | 
						|
        
 | 
						|
   Author:
 | 
						|
 | 
						|
       Murali R. Krishnan    ( MuraliK )     06-Nov-1997 
 | 
						|
 | 
						|
   Environment:
 | 
						|
       Win32 - User Mode
 | 
						|
       
 | 
						|
   Project:
 | 
						|
 | 
						|
       Internet Server Test DLL
 | 
						|
 | 
						|
   Functions Exported:
 | 
						|
 | 
						|
 | 
						|
 | 
						|
   Revision History:
 | 
						|
 | 
						|
--*/
 | 
						|
 | 
						|
 | 
						|
/************************************************************
 | 
						|
 *     Include Headers
 | 
						|
 ************************************************************/
 | 
						|
# include <windows.h>
 | 
						|
# include <stdio.h>
 | 
						|
# include <stdlib.h>
 | 
						|
# include <math.h>
 | 
						|
 | 
						|
/************************************************************
 | 
						|
 *    Functions 
 | 
						|
 ************************************************************/
 | 
						|
 | 
						|
 | 
						|
 | 
						|
// specifies how many characters we will use for hashing string names
 | 
						|
DWORD  g_dwFastHashLength = (16);
 | 
						|
 | 
						|
 | 
						|
//
 | 
						|
// Following is the original hash-calculator used by Tsunami cache in
 | 
						|
//  IIS 4.0
 | 
						|
//
 | 
						|
DWORD
 | 
						|
CalculateHashAndLengthOfPathName(
 | 
						|
    LPCSTR pszPathName,
 | 
						|
    PULONG pcbLength
 | 
						|
    )
 | 
						|
{
 | 
						|
    DWORD hash = 0;
 | 
						|
    CHAR      ch;
 | 
						|
 | 
						|
    DWORD     start;
 | 
						|
    DWORD     index;
 | 
						|
 | 
						|
    *pcbLength = strlen(pszPathName);
 | 
						|
 | 
						|
    //
 | 
						|
    // hash the last g_dwFastHashLength characters
 | 
						|
    //
 | 
						|
 | 
						|
    if ( *pcbLength < g_dwFastHashLength ) {
 | 
						|
        start = 0;
 | 
						|
    } else {
 | 
						|
        start = *pcbLength - g_dwFastHashLength;
 | 
						|
    }
 | 
						|
 | 
						|
    for ( index = start; pszPathName[index] != '\0'; index++ ) {
 | 
						|
 | 
						|
        //
 | 
						|
        // This is an extremely slimey way of getting upper case.
 | 
						|
        // Kids, don't try this at home
 | 
						|
        // -johnson
 | 
						|
        //
 | 
						|
 | 
						|
        ch = pszPathName[index] & (CHAR)~0x20;
 | 
						|
 | 
						|
        hash <<= 1;
 | 
						|
        hash ^= ch;
 | 
						|
        hash <<= 1;
 | 
						|
        hash += ch;
 | 
						|
    }
 | 
						|
 | 
						|
    //
 | 
						|
    // Multiply by length to give additional randomness
 | 
						|
    //
 | 
						|
 | 
						|
    return( hash * *pcbLength);
 | 
						|
 | 
						|
} // CalculateHashAndLengthOfPathName()
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
//
 | 
						|
// Following is the version of Perfect hash function used inside
 | 
						|
//  NTFS for hashing file names - used in NT5
 | 
						|
//
 | 
						|
DWORD
 | 
						|
NTPerfCalcHashAndLengthOfPathName(
 | 
						|
    LPCSTR pszPathName,
 | 
						|
    PULONG pcbLength
 | 
						|
    )
 | 
						|
{
 | 
						|
 | 
						|
    /* default value for "scrambling constant" */
 | 
						|
#define FILENAME_STRING_CONVERT_CONSTANT   314159269 
 | 
						|
    /* prime number, also used for scrambling  */
 | 
						|
#define FILENAME_STRING_PRIME     1000000007   
 | 
						|
 | 
						|
//
 | 
						|
// Compute a hash value for the string which is invairant to case.  The 
 | 
						|
// distribution of hash strings should be nearly perfect using the following
 | 
						|
// function.  Unless file names become extremely long, say average over 512
 | 
						|
// characters, processing all characters is the best way to get perfect
 | 
						|
// distribution.
 | 
						|
//
 | 
						|
// For really long file names a better algorithm could include taking the
 | 
						|
// first n plus the last n plus psuedo-random number of characters in the
 | 
						|
// middle using the last n has the psuedo-random seed.  Just the cost of 
 | 
						|
// calculating a new end point (say last 10 characters) will decrease the
 | 
						|
// speed of this function enough to cancel the savings of not using the first
 | 
						|
// n-1 characters, so any changes here should be tested for performance.
 | 
						|
//
 | 
						|
// Leave out the upcase code for now (see sample here).
 | 
						|
// todo: Leave in _ch until case issue solved
 | 
						|
//_ch = RtlUpcaseUnicodeChar( *_p++ );    and put slash here                
 | 
						|
 | 
						|
    DWORD hash = 0;
 | 
						|
    CHAR      ch;
 | 
						|
 | 
						|
    LPCSTR    pszScan;
 | 
						|
    LPCSTR    pszEnd;
 | 
						|
 | 
						|
    *pcbLength = strlen(pszPathName);
 | 
						|
 | 
						|
    pszScan  = pszPathName;
 | 
						|
    pszEnd   = pszPathName + *pcbLength;
 | 
						|
    
 | 
						|
    while ( pszScan < pszEnd) {
 | 
						|
        hash = 37 * hash + (unsigned int)((*pszScan++) &  (CHAR)~0x20);
 | 
						|
    } // while
 | 
						|
 | 
						|
    return ( abs(FILENAME_STRING_CONVERT_CONSTANT * hash) % 
 | 
						|
             FILENAME_STRING_PRIME);
 | 
						|
}  // NTPerfCalcHashAndLengthOfPathName()
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
DWORD
 | 
						|
CalculateVariance( IN LPDWORD prgBinDistribs, IN DWORD nBins, 
 | 
						|
                   IN DWORD dwAverage)
 | 
						|
{
 | 
						|
    //
 | 
						|
    // Variance  = Sigma( (X-Avg)^2)/n
 | 
						|
    //
 | 
						|
    
 | 
						|
    DWORD dwVariance = 0;
 | 
						|
    LONG  delta;
 | 
						|
    DWORD binIndex;
 | 
						|
 | 
						|
    for ( binIndex = 0; binIndex < nBins; binIndex++) {
 | 
						|
        
 | 
						|
        delta = (prgBinDistribs[binIndex] - dwAverage);
 | 
						|
        dwVariance += (delta * delta);
 | 
						|
    } // for binIndex
 | 
						|
 | 
						|
    return ( dwVariance/nBins);
 | 
						|
} // CalculateVariance()
 | 
						|
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
PrintBinDistributions( IN LPDWORD prgBinDistribs, IN DWORD nBins)
 | 
						|
{
 | 
						|
    
 | 
						|
    printf( "\n-------------------------------------------------------\n"
 | 
						|
            "\n     Hash Value Distributions Calculated  Bins = %d    \n"
 | 
						|
            "[Index]  "
 | 
						|
            ,
 | 
						|
            nBins
 | 
						|
            );
 | 
						|
 | 
						|
    DWORD binIndex;
 | 
						|
    DWORD dwAverage = 0;
 | 
						|
 | 
						|
    // print the header
 | 
						|
    for (binIndex = 0; binIndex < 10; binIndex++) {
 | 
						|
        printf( "%4d,", binIndex);
 | 
						|
    }
 | 
						|
        
 | 
						|
    for ( binIndex = 0; binIndex < nBins; binIndex++) {
 | 
						|
 | 
						|
        if ( binIndex %10 == 0) {
 | 
						|
            printf( "\n[%5d]  ", binIndex);
 | 
						|
        }
 | 
						|
 | 
						|
        dwAverage += prgBinDistribs[binIndex];
 | 
						|
        printf( "%4d,", prgBinDistribs[binIndex]);
 | 
						|
    } // for binIndex
 | 
						|
 | 
						|
    dwAverage /= nBins;
 | 
						|
 | 
						|
    DWORD dwVariance = CalculateVariance( prgBinDistribs, nBins, dwAverage);
 | 
						|
 | 
						|
    printf( "\n--------------------------------------------------------\n"
 | 
						|
            "\n  Average = %d; Variance = %d; Standard Deviation +/- %8.3g\n"
 | 
						|
            "\n--------------------------------------------------------\n"
 | 
						|
            ,
 | 
						|
            dwAverage,
 | 
						|
            dwVariance,
 | 
						|
            sqrt( (double ) dwVariance)
 | 
						|
            );
 | 
						|
    return;
 | 
						|
} // PrintBinDistributions()
 | 
						|
 | 
						|
 | 
						|
/* ------------------------------------------------------------
 | 
						|
 *  SpecWeb96 file names have following syntax
 | 
						|
 *  /spec/dirX/classY_Z
 | 
						|
 * where,
 | 
						|
 *    X = 0 ... 200
 | 
						|
 *    Y = 0, 1, 2, 3
 | 
						|
 *    Z = 0 ... 8
 | 
						|
 * ------------------------------------------------------------
 | 
						|
 */
 | 
						|
 | 
						|
# define MAX_DIR_INDEX (200)
 | 
						|
# define MAX_Y_INDEX   (3)
 | 
						|
# define MAX_Z_INDEX   (8)
 | 
						|
 | 
						|
 | 
						|
DWORD g_rgBinDistrib113[114];
 | 
						|
DWORD g_rgBinDistrib223[224];
 | 
						|
 | 
						|
 | 
						|
int __cdecl
 | 
						|
main(int argc, char * argv[])
 | 
						|
{
 | 
						|
    DWORD dirIndex;
 | 
						|
    DWORD YIndex;
 | 
						|
    DWORD ZIndex;
 | 
						|
    DWORD index = 0;
 | 
						|
 | 
						|
 | 
						|
    if ( argc > 1 ) {
 | 
						|
        g_dwFastHashLength = atoi( argv[1]);
 | 
						|
        printf( "Using fast hash-lenght with match for %d suffix chars\n",
 | 
						|
                g_dwFastHashLength);
 | 
						|
    }
 | 
						|
 | 
						|
    ZeroMemory( g_rgBinDistrib113, sizeof(g_rgBinDistrib113));
 | 
						|
    ZeroMemory( g_rgBinDistrib223, sizeof(g_rgBinDistrib223));
 | 
						|
 | 
						|
 | 
						|
    printf( "\n------------------------------------------------------------\n"
 | 
						|
           "\n         Hash Values Calculated                             \n"
 | 
						|
           "%6s, %30s, %6s, %8s, %6s, %6s\n",
 | 
						|
           "Index",
 | 
						|
           "File Name",
 | 
						|
           "Len",
 | 
						|
           "Hash",
 | 
						|
           "Bin113",
 | 
						|
           "Bin223"
 | 
						|
           );
 | 
						|
    for ( dirIndex = 0; dirIndex <= MAX_DIR_INDEX; dirIndex++) {
 | 
						|
        
 | 
						|
        CHAR  rgchFileName[MAX_PATH];
 | 
						|
        DWORD cchDirName;
 | 
						|
        
 | 
						|
        cchDirName = wsprintf( rgchFileName, "/spec/dir%d/class", dirIndex);
 | 
						|
 | 
						|
        for ( YIndex = 0; YIndex <= MAX_Y_INDEX; YIndex++) {
 | 
						|
            
 | 
						|
            for (ZIndex = 0; ZIndex <= MAX_Z_INDEX; ZIndex++) {
 | 
						|
 | 
						|
                DWORD cchFileName;
 | 
						|
                DWORD hashValue;
 | 
						|
                DWORD cchPathLen = 0;
 | 
						|
 | 
						|
                cchFileName = (cchDirName + 
 | 
						|
                               wsprintf( rgchFileName + cchDirName,
 | 
						|
                                         "%d_%d",
 | 
						|
                                         YIndex, ZIndex)
 | 
						|
                               );
 | 
						|
 | 
						|
# ifdef IIS_K2_HASH
 | 
						|
                hashValue = CalculateHashAndLengthOfPathName( rgchFileName,
 | 
						|
                                                              &cchPathLen
 | 
						|
                                                              );
 | 
						|
# else 
 | 
						|
                hashValue = NTPerfCalcHashAndLengthOfPathName( rgchFileName,
 | 
						|
                                                               &cchPathLen);
 | 
						|
# endif // IIS_K2_HA
 | 
						|
 | 
						|
                printf( "[%6d], %30s, %6d, 0x%8X, %6d, %6d\n",
 | 
						|
                        index++,
 | 
						|
                        rgchFileName,
 | 
						|
                        cchPathLen,
 | 
						|
                        hashValue,
 | 
						|
                        hashValue % 113,  // bin for 113 bins
 | 
						|
                        hashValue % 223   // bin for 223 bins
 | 
						|
                        );
 | 
						|
 | 
						|
                g_rgBinDistrib113[hashValue%113] += 1;
 | 
						|
                g_rgBinDistrib223[hashValue%223] += 1;
 | 
						|
                
 | 
						|
            } // for ZIndex
 | 
						|
 | 
						|
        } // for YIndex
 | 
						|
 | 
						|
    } // for dirIndex()
 | 
						|
 | 
						|
 | 
						|
    PrintBinDistributions( g_rgBinDistrib113, 113);
 | 
						|
    PrintBinDistributions( g_rgBinDistrib223, 223);
 | 
						|
 | 
						|
 | 
						|
    return (0);
 | 
						|
} // main()
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/************************ End of File ***********************/
 |