/*

  SVMHANDLER.H
  (c) copyright 1998 Microsoft Corp

  Contains the class encapsulating the Support Vector Machine used to do on the fly spam detection

  Robert Rounthwaite (RobertRo@microsoft.com)

*/

#pragma once

#ifndef REAL
typedef double REAL;
#endif

#define SAFE_FREE( p ) if (p!=NULL) free(p);

enum boolop
{
	boolopOr,
	boolopAnd
};

#include "svmutil.h"

class MAILFILTER
{
	/* 
		The public interface to the MAILFILTER class is below. Normal use of this class to filter mail
		will entail:
		Calling the following once: FSetSVMDataLocation() and SetSpamCutoff()
		Setting the "Properties of the user"
		...and, for each message you filter
			- Calling BCalculateSpamProb()
	*/
public:
	// Sets the location of the SVM Data file(.LKO file). Must be called before calling any other methods
	// Data file must be present at time function is called
	// returns true if successful, false otherwise
	bool FSetSVMDataLocation(char *szFullPath);

	// Sets the Spam cutoff percentage. Must be in range from 0 to 100
	bool SetSpamCutoff(REAL rCutoff);
	// returns value set with SetSpamCutoff. Defaults == DefaultSpamCutoff
	// if no value has been set when SVM output file is read
	REAL GetSpamCutoff();
	// returns default value for SpamCutoff. read from SVM output file.
	// should call FSetSVMDataLocation before calling this function
	REAL GetDefaultSpamCutoff();

	// Properties of the user
	void SetFirstName(char *szFirstName);
	void SetLastName(char *szLastName);
	void SetCompanyName(char *szCompanyName);

	// Calculates the probability that the current message (defined by the properties of the message) is spam.
	// !Note! that the IN string params may be modified by the function.
	// Returns the probability (0 to 1) that the message is spam in prSpamProb
	// the boolean return is determined by comparing to the spam cutoff
	// if the value of a boolean param is unknown use false, use 0 for unknown time.
	bool BCalculateSpamProb(/* IN params */
							char *szFrom,
							char *szTo,
							char *szSubject,
							char *szBody,
							bool bDirectMessage,
							bool bHasAttach,
							FILETIME tMessageSent,
							/* OUT params */
							REAL *prSpamProb, 
							bool * pbIsSpam);

	MAILFILTER();
	~MAILFILTER();

	// Reads the default spam cutoff without parsing entire file
	// Use GetDefaultSpamCutoff if using FSetSVMDataLocation;
	static bool BReadDefaultSpamCutoff(char *szFullPath, REAL *prDefCutoff);

private: // members
	struct FeatureComponent
	{
		FeatureLocation loc;
		union
		{
			char *szFeature;
			UINT iRuleNum; // used with locSpecial
		};
		// map feature to location in dst file/location in SVM output
		// more than one feature component may map to the same location, combined with the op
		int iFeature;
		boolop bop; // first feature in group is alway bopOr
		bool fPresent;
		FeatureComponent() { loc = locNil; }
		~FeatureComponent() 
		{ 
			if ((loc>locNil) && (loc < locSpecial))
			{
				free(szFeature);
			}
		}
	};

	FeatureComponent *rgfeaturecomps;

	// weights from SVM output
	REAL *rgrSVMWeights;
	// Other SVM file variables
	REAL _rCC;
	REAL _rDD;
	REAL _rThresh;
	REAL _rDefaultThresh;

	// Counts
	UINT _cFeatures;
	UINT _cFeatureComps;

	// is Feature present? -1 indicates not yet set, 0 indicates not present, 1 indicates present
	int *_rgiFeatureStatus;

	// Properties of the user
	char *_szFirstName;
	char *_szLastName;
	char *_szCompanyName;

	// Set via FSetSVMDataLocation() and SetSpamCutoff()
	CString _strFName;
	REAL _rSpamCutoff;

	// Properties of the message
	char *_szFrom; 
	char *_szTo; 
	char *_szSubject; 
	char *_szBody;
	bool _bDirectMessage;
	FILETIME _tMessageSent;
	bool _bHasAttach;

	// Cached special rule results used during spam calculations
	bool _bRule14;
	bool _bRule17;

private: // methods
	bool ReadSVMOutput(LPCTSTR lpszFileName);
	void EvaluateFeatureComponents();
	void ProcessFeatureComponentPresence();
	REAL RDoSVMCalc();
	bool FInvokeSpecialRule(UINT iRuleNum);
	void HandleCaseSensitiveSpecialRules();
};