2025-11-27 16:46:48 +09:00

223 lines
8.3 KiB
C++
Raw Permalink Blame History

// HTMLParser.h: interface for the COXHTMLParser class.
//
//////////////////////////////////////////////////////////////////////
// Version: 9.3
#if !defined(AFX_HTMLPARSER_H__45F43BB2_0F0E_11D3_AA04_62B93C000000__INCLUDED_)
#define AFX_HTMLPARSER_H__45F43BB2_0F0E_11D3_AA04_62B93C000000__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#include "OXParser.h"
struct TagDescriptor
{
LPTSTR szTag;
DWORD dwTagFlag;
DWORD dwTextFlag;
};
class OX_CLASS_DECL COXHTMLParser : public COXParser
{
friend class CTagDescriptor;
protected:
enum TAG_TYPES {
TAG_BLOCK = 0x00000001L, // Block tag (eg <p>, <table> etc)
TAG_OPTEND = 0x00000002L, // Optional ending tag. Block tag will end this tag
TAG_EMPTY = 0x00000004L, // Empty tag (eg <br>)
TAG_SECTION = 0x00000008L, // Section tag (<body>, <head> or <html>)
TAG_CANCENTER = 0x00000010L, // Tag can be wrapped by <center></center>
TAG_HEAD = 0x00000020L, // Tag belongs in <head> element
};
enum TEXT_FLAGS {
TEXT_BIG = 0x00000001L,
TEXT_BOLD = 0x00000002L,
TEXT_CENTER = 0x00000004L,
TEXT_CITE = 0x00000008L,
TEXT_CODE = 0x00000010L,
TEXT_DEFN = 0x00000020L,
TEXT_EMPHASIS = 0x00000040L,
TEXT_FORMATTED = 0x00000080L,
TEXT_ITALIC = 0x00000100L,
TEXT_KEYBOARD = 0x00000200L,
TEXT_PRE = 0x00000400L,
TEXT_SAMPLE = 0x00000800L,
TEXT_SMALL = 0x00001000L,
TEXT_STRIKE = 0x00002000L,
TEXT_STRONG = 0x00004000L,
TEXT_SUB = 0x00008000L,
TEXT_SUP = 0x00010000L,
TEXT_TELETYPE = 0x00020000L,
TEXT_UNDERLINE = 0x00040000L,
TEXT_VAR = 0x00080000L,
TEXT_FONT = 0x10000000L,
};
enum HTMLErrorTypes
{
ERROR_HTML_FIRST = 1000, // All error messages start
ERROR_HTML_LAST, // All error messages end
WARNING_HTML_FIRST = 2000, // All warnings start
WARNING_UNKNOWN_TAG, // Unknown tag found
WARNING_HTML_LAST, // All warnings end
};
public:
COXHTMLParser();
virtual ~COXHTMLParser();
protected:
BOOL IsEmptyTag(LPCTSTR szTag) { return IsEmptyTag(GetTagDescriptor(szTag)); }
BOOL IsTextModifier(LPCTSTR szTag) { return IsTextModifier(GetTagDescriptor(szTag)); }
BOOL IsOptionalEndTag(LPCTSTR szTag) { return IsOptionalEndTag(GetTagDescriptor(szTag)); }
BOOL IsSectionTag(LPCTSTR szTag) { return IsSectionTag(GetTagDescriptor(szTag)); }
BOOL IsBlockTag(LPCTSTR szTag) { return IsBlockTag(GetTagDescriptor(szTag)); }
BOOL IsHeadTag(LPCTSTR szTag) { return IsHeadTag(GetTagDescriptor(szTag)); }
BOOL CanCenter(LPCTSTR szTag) { return CanCenter(GetTagDescriptor(szTag)); }
BOOL IsEmptyTag(TagDescriptor *pTag);
BOOL IsTextModifier(TagDescriptor *pTag);
BOOL IsOptionalEndTag(TagDescriptor *pTag);
BOOL IsSectionTag(TagDescriptor *pTag);
BOOL IsBlockTag(TagDescriptor *pTag);
BOOL IsHeadTag(TagDescriptor* pTag);
BOOL CanCenter(TagDescriptor* pTag);
TagDescriptor* GetTagDescriptor(LPCTSTR szTag) const;
COXParserElement* DuplicateFontElement(COXParserElement* pElement);
COXParserElement* ConstructFontElement();
BOOL GetValueString(COXQuickString& str);
UINT GetTextStyle() { return m_TextStyleStack.back(); }
void SetTextStyle(UINT nStyle) { m_TextStyleStack.back() = nStyle; }
void PushTextStyle() { m_TextStyleStack.push_back(0); }
void PopTextStyle() { m_TextStyleStack.pop_back(); }
public:
// --- In<49> :
// --- Out :
// --- Returns :
// --- Effect : Removes all childs object and release
// all memory allocated by the object
virtual void Clear();
// --- In<49> :
// --- Out :
// --- Returns : TRUE always.
// --- Effect : Initializes members of the class.
virtual BOOL Initialize();
// --- In<49> :
// --- Out :
// --- Returns : TRUE always.
// --- Effect : Do nothing, override it if you need it.
virtual BOOL Cleanup();
// --- In<49> : pParent - pointer to an object to parse text from.
// --- Out :
// --- Returns : pointer to an object of type COXParser::STRING on success,
// NULL otherwise.
// --- Effect : Parses text element.
virtual COXParserObject* ParseText(COXParserElement* pParent);
// --- In<49> : pElement - pointer to an element to be added to.
// pObject - pointer to an object to be added to the element
// --- Out :
// --- Returns :
// --- Effect : Adds object to the element.
virtual void AddObjectToElement(COXParserElement* pElement, COXParserObject* pObject);
// --- In<49> : pElement - pointer to an object to parse attributes.
// --- Out :
// --- Returns : TRUE on success, FALSE otherwise.
// --- Effect : Parses attributes of an object.
virtual BOOL ParseAttributes(COXParserElement* pElement);
// --- In<49> : pParent - the parent of the element whose tag is being parsed.
// --- Out : bEmptyTag - TRUE, if tag does not requiere end tag, like <BR>
// --- Returns : pointer to an new object on success, NULL otherwise.
// --- Effect : Parses start part of a tag.
virtual COXParserElement* ParseStartTag(COXParserElement* pParent, BOOL& bEmptyTag);
// --- In<49> : pElement - unused.
// strEndTag - text to be parsed as end tag
// --- Out :
// --- Returns : TRUE on success, FALSE otherwise.
// --- Effect : Parses end part of a tag.
virtual BOOL ParseEndTag(COXParserElement* pElement, COXQuickString& strEndTag);
// --- In<49> : szCurrentTag - current tag.
// szNewTag - new tag.
// NewTagIsEndTag - specifies whether or not szNewTag is an end tag (TRUE)
// or a start tag (FALSE)
// --- Out :
// --- Returns : TRUE if end tag should be,
// FALSE if szNewTag represents a new child element of szCurrentTag
// --- Effect : Handles tags.
virtual BOOL IsEndTagMissing(LPCTSTR szCurrentTag, LPCTSTR szNewTag,
BOOL NewTagIsEndTag);
// --- In<49> : pElement - unused.
// bEmptyTag - unused.
// --- Out :
// --- Returns : FALSE always.
// --- Effect : Override this member function if you need it.
virtual BOOL IgnoreStartTag(COXParserElement* pElement, BOOL bEmptyTag);
// --- In<49> : szEndTag - end tag.
// --- Out :
// --- Returns : TRUE if text style or font has been changed.
// --- Effect : Determines if end has changed style or font of the text.
virtual BOOL IgnoreEndTag(LPCTSTR szEndTag);
// --- In<49> : hFile - a handle of a file to write attributes to.
// pElement - a pointer to an object to write attributes from
// --- Out :
// --- Returns : TRUE on success, FALSE otherwise
// --- Effect : Saves attributes from an object to specified file.
virtual BOOL WriteAttributes(HANDLE hFile, COXParserElement* pElement);
// --- In<49> : hFile - a handle of a file to write element to.
// pElement - a pointer to an element to write
// nLevel - not used
// --- Out :
// --- Returns : TRUE on success, FALSE otherwise
// --- Effect : Saves element to specified file.
virtual BOOL WriteElement(HANDLE hFile, COXParserElement* pElement, int nLevel);
// --- In<49> : hFile - a handle of a file to write text to.
// pObject - a pointer to an object to write text from
// nLevel - not used
// --- Out :
// --- Returns : TRUE on success, FALSE otherwise
// --- Effect : Saves text from an object to specified file.
virtual BOOL WriteText(HANDLE hFile, COXParserObject* pObject, int nLevel);
// --- In<49> : nErrorCode - error code.
// --- Returns : pointer to string describing the error
// --- Effect : Returns description of the error.
virtual LPCTSTR TranslateErrorCode(int nErrorCode);
protected:
static TagDescriptor m_HTMLTags[]; // List of known HTML tags
static ParserEntity m_HTMLEntity[]; // List of known special char entities
std::vector<COXParserElement*> m_FontStack; // Current font stack
std::vector<UINT> m_TextStyleStack; // Current text style stack
COXHashTable m_HTMLTagTable; // Lookup Table of HTML tags
};
#endif // !defined(AFX_HTMLPARSER_H__45F43BB2_0F0E_11D3_AA04_62B93C000000__INCLUDED_)