1086 lines
38 KiB
C++
1086 lines
38 KiB
C++
// HTMLParser.cpp: implementation of the COXHTMLParser class.
|
||
//
|
||
//////////////////////////////////////////////////////////////////////
|
||
// Version: 9.3
|
||
|
||
|
||
#include "stdafx.h"
|
||
#include "OXHTMLParser.h"
|
||
|
||
#include "UTBStrOp.h"
|
||
|
||
#ifdef _DEBUG
|
||
#undef THIS_FILE
|
||
static char THIS_FILE[]=__FILE__;
|
||
#define new DEBUG_NEW
|
||
#endif
|
||
|
||
// Limitations:
|
||
// - <a href=http://domain/directory/script.cgi?ord="string"> tags not parsed correctly
|
||
// - only VERY limited validation is done. For instance, there is no check on tag
|
||
// ordering or nesting (eg <head> may appear after <body>)
|
||
// - No real validation is performed. For instance, empty <p></p> tags are not removed.
|
||
//
|
||
// The full solution is to make COXParser fully XML 1.0 compliant in regards to Document
|
||
// Type Definitions, then simply provide a HTML DTD for the given version of HTML to
|
||
// be parsed. One day...
|
||
|
||
//////////////////////////////////////////////////////////////////////
|
||
// Construction/Destruction
|
||
//////////////////////////////////////////////////////////////////////
|
||
|
||
// Tags (up to HTML 4.0)
|
||
// Tag Flags:
|
||
//
|
||
// TAG_BLOCK - tags that delimit blocks that signify the end of a paragraph
|
||
// TAG_OPTEND - tags that have an optional closing tag eg <P>
|
||
// TAG_EMPTY - tags that do not have a closing tag at all (eg <HR>)
|
||
// TAG_SECTION - tags for special sections (eg <BODY>)
|
||
// TAG_CANCENTER - can be wrapped by a "center" tag
|
||
//
|
||
// Text flags - for tags that modify text formatting (eg <B>). These flags
|
||
// are not block elements, but merely font attribute "toggles"
|
||
TagDescriptor COXHTMLParser::m_HTMLTags[] =
|
||
{
|
||
// Tag name Tag Flags Tag Text Flag
|
||
{ TEXT("HTML"), TAG_BLOCK|TAG_OPTEND|TAG_SECTION, 0, },
|
||
|
||
{ TEXT("HEAD"), TAG_BLOCK|TAG_OPTEND|TAG_SECTION, 0, },
|
||
|
||
{ TEXT("TITLE"), TAG_HEAD|TAG_BLOCK, 0, },
|
||
{ TEXT("BASE"), TAG_HEAD|TAG_BLOCK|TAG_EMPTY, 0, },
|
||
{ TEXT("LINK"), TAG_HEAD|TAG_EMPTY, 0, },
|
||
{ TEXT("META"), TAG_HEAD|TAG_BLOCK|TAG_EMPTY, 0, },
|
||
{ TEXT("SCRIPT"), TAG_HEAD|TAG_BLOCK, 0, },
|
||
{ TEXT("STYLE"), TAG_HEAD, 0, },
|
||
|
||
{ TEXT("BODY"), TAG_BLOCK|TAG_OPTEND|TAG_SECTION, 0, },
|
||
{ TEXT("FRAMESET"), TAG_BLOCK, 0, },
|
||
|
||
{ TEXT("A"), 0, 0 },
|
||
{ TEXT("ABBR"), 0, 0, },
|
||
{ TEXT("ACRONYM"), 0, 0, },
|
||
{ TEXT("ADDRESS"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("APPLET"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("AREA"), TAG_EMPTY, 0, },
|
||
{ TEXT("B"), 0, TEXT_BOLD, },
|
||
{ TEXT("BASEFONT"), TAG_BLOCK|TAG_EMPTY, 0, },
|
||
{ TEXT("BDO"), 0, 0, },
|
||
{ TEXT("BIG"), 0, TEXT_BIG, },
|
||
{ TEXT("BLOCKQUOTE"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("BR"), TAG_EMPTY, 0, },
|
||
{ TEXT("BUTTON"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("CAPTION"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("CENTER"), TAG_BLOCK, TEXT_CENTER, },
|
||
{ TEXT("CITE"), 0, TEXT_CITE, },
|
||
{ TEXT("CODE"), 0, TEXT_CODE, },
|
||
{ TEXT("COL"), TAG_BLOCK|TAG_EMPTY, 0, },
|
||
{ TEXT("COLGROUP"), TAG_BLOCK|TAG_OPTEND, 0, },
|
||
{ TEXT("DD"), TAG_BLOCK|TAG_OPTEND, 0, },
|
||
{ TEXT("DEL"), 0, TEXT_DEFN, },
|
||
{ TEXT("DFN"), 0, 0, },
|
||
{ TEXT("DIR"), TAG_BLOCK, 0, },
|
||
{ TEXT("DIV"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("DL"), TAG_BLOCK, 0, },
|
||
{ TEXT("DT"), TAG_BLOCK|TAG_OPTEND, 0, },
|
||
{ TEXT("EM"), 0, TEXT_EMPHASIS, },
|
||
{ TEXT("FIELDSET"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("FONT"), 0, TEXT_FONT, },
|
||
{ TEXT("FORM"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("FRAME"), TAG_BLOCK|TAG_EMPTY|TAG_CANCENTER, 0, },
|
||
{ TEXT("H1"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("H2"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("H3"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("H4"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("H5"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("H6"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("HR"), TAG_BLOCK|TAG_EMPTY, 0, },
|
||
{ TEXT("I"), 0, TEXT_ITALIC, },
|
||
{ TEXT("IFRAME"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("IMG"), TAG_EMPTY|TAG_CANCENTER, 0, },
|
||
{ TEXT("INPUT"), TAG_BLOCK|TAG_EMPTY|TAG_CANCENTER, 0, },
|
||
{ TEXT("INS"), 0, 0, },
|
||
{ TEXT("ISINDEX"), TAG_EMPTY, 0, },
|
||
{ TEXT("KBD"), 0, TEXT_KEYBOARD, },
|
||
{ TEXT("LABEL"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("LEGEND"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("LI"), TAG_BLOCK|TAG_OPTEND, 0, },
|
||
{ TEXT("MAP"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("MENU"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("NOFRAMES"), TAG_BLOCK, 0, },
|
||
{ TEXT("NOSCRIPT"), TAG_BLOCK, 0, },
|
||
{ TEXT("OBJECT"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("OL"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("OPTGROUP"), 0, 0, },
|
||
{ TEXT("OPTION"), TAG_OPTEND, 0, },
|
||
{ TEXT("P"), TAG_BLOCK|TAG_OPTEND|TAG_CANCENTER, 0, },
|
||
{ TEXT("PARAM"), TAG_EMPTY, 0, },
|
||
{ TEXT("PRE"), 0, TEXT_FORMATTED, },
|
||
{ TEXT("Q"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("S"), 0, TEXT_STRIKE, },
|
||
{ TEXT("SAMP"), 0, TEXT_SAMPLE, },
|
||
{ TEXT("SELECT"), 0, 0, },
|
||
{ TEXT("SMALL"), 0, TEXT_SMALL, },
|
||
{ TEXT("SPAN"), 0, 0, },
|
||
{ TEXT("STRIKE"), 0, TEXT_STRIKE, },
|
||
{ TEXT("STRONG"), 0, TEXT_STRONG, },
|
||
{ TEXT("SUB"), 0, TEXT_SUB, },
|
||
{ TEXT("SUP"), 0, TEXT_SUP, },
|
||
{ TEXT("TABLE"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("TBODY"), TAG_BLOCK|TAG_OPTEND, 0, },
|
||
{ TEXT("TD"), TAG_BLOCK|TAG_OPTEND, 0, },
|
||
{ TEXT("TEXTAREA"), 0, 0, },
|
||
{ TEXT("TFOOT"), TAG_OPTEND, 0, },
|
||
{ TEXT("TH"), TAG_OPTEND, 0, },
|
||
{ TEXT("THEAD"), TAG_OPTEND, 0, },
|
||
{ TEXT("TR"), TAG_BLOCK|TAG_OPTEND, 0, },
|
||
{ TEXT("TT"), 0, TEXT_TELETYPE, },
|
||
{ TEXT("U"), 0, TEXT_UNDERLINE, },
|
||
{ TEXT("UL"), TAG_BLOCK|TAG_CANCENTER, 0, },
|
||
{ TEXT("VAR"), 0, TEXT_VAR, },
|
||
|
||
{ NULL, 0, 0, },
|
||
};
|
||
|
||
// Taken from the "Tidy" program from the W3C site. Some of the following tags
|
||
// are not recognised by the major browsers, so leave the esoteric ones out for now.
|
||
ParserEntity COXHTMLParser::m_HTMLEntity[] =
|
||
{
|
||
{ TEXT("nbsp"), TEXT("<EFBFBD>"), },
|
||
//{ TEXT("iexcl"), TEXT("<22>"), },
|
||
{ TEXT("cent"), TEXT("<EFBFBD>"), },
|
||
{ TEXT("pound"), TEXT("<EFBFBD>"), },
|
||
//{ TEXT("curren"), TEXT("<22>"), },
|
||
//{ TEXT("yen"), TEXT("<22>"), },
|
||
//{ TEXT("brvbar"), TEXT("<22>"), },
|
||
//{ TEXT("sect"), TEXT("<22>"), },
|
||
//{ TEXT("uml"), TEXT("<22>"), },
|
||
{ TEXT("copy"), TEXT("<EFBFBD>"), },
|
||
//{ TEXT("ordf"), TEXT("<22>"), },
|
||
//{ TEXT("laquo"), TEXT("<22>"), },
|
||
//{ TEXT("not"), TEXT("<22>"), },
|
||
//{ TEXT("shy"), TEXT("<22>"), },
|
||
{ TEXT("reg"), TEXT("<EFBFBD>"), },
|
||
//{ TEXT("macr"), TEXT("<22>"), },
|
||
//{ TEXT("deg"), TEXT("<22>"), },
|
||
//{ TEXT("plusmn"), TEXT("<22>"), },
|
||
//{ TEXT("sup2"), TEXT("<22>"), },
|
||
//{ TEXT("sup3"), TEXT("<22>"), },
|
||
//{ TEXT("acute"), TEXT("<22>"), },
|
||
//{ TEXT("micro"), TEXT("<22>"), },
|
||
//{ TEXT("para"), TEXT("<22>"), },
|
||
//{ TEXT("middot"), TEXT("<22>"), },
|
||
//{ TEXT("cedil"), TEXT("<22>"), },
|
||
//{ TEXT("sup1"), TEXT("<22>"), },
|
||
//{ TEXT("ordm"), TEXT("<22>"), },
|
||
//{ TEXT("raquo"), TEXT("<22>"), },
|
||
//{ TEXT("frac14"), TEXT("<22>"), },
|
||
//{ TEXT("frac12"), TEXT("<22>"), },
|
||
//{ TEXT("frac34"), TEXT("<22>"), },
|
||
//{ TEXT("iquest"), TEXT("<22>"), },
|
||
//{ TEXT("Agrave"), TEXT("<22>"), },
|
||
//{ TEXT("Aacute"), TEXT("<22>"), },
|
||
//{ TEXT("Acirc"), TEXT("<22>"), },
|
||
//{ TEXT("Atilde"), TEXT("<22>"), },
|
||
//{ TEXT("Auml"), TEXT("<22>"), },
|
||
//{ TEXT("Aring"), TEXT("<22>"), },
|
||
//{ TEXT("AElig"), TEXT("<22>"), },
|
||
//{ TEXT("Ccedil"), TEXT("<22>"), },
|
||
//{ TEXT("Egrave"), TEXT("<22>"), },
|
||
//{ TEXT("Eacute"), TEXT("<22>"), },
|
||
//{ TEXT("Ecirc"), TEXT("<22>"), },
|
||
//{ TEXT("Euml"), TEXT("<22>"), },
|
||
//{ TEXT("Igrave"), TEXT("<22>"), },
|
||
//{ TEXT("Iacute"), TEXT("<22>"), },
|
||
//{ TEXT("Icirc"), TEXT("<22>"), },
|
||
//{ TEXT("Iuml"), TEXT("<22>"), },
|
||
//{ TEXT("ETH"), TEXT("<22>"), },
|
||
//{ TEXT("Ntilde"), TEXT("<22>"), },
|
||
//{ TEXT("Ograve"), TEXT("<22>"), },
|
||
//{ TEXT("Oacute"), TEXT("<22>"), },
|
||
//{ TEXT("Ocirc"), TEXT("<22>"), },
|
||
//{ TEXT("Otilde"), TEXT("<22>"), },
|
||
//{ TEXT("Ouml"), TEXT("<22>"), },
|
||
//{ TEXT("times"), TEXT("<22>"), },
|
||
//{ TEXT("Oslash"), TEXT("<22>"), },
|
||
//{ TEXT("Ugrave"), TEXT("<22>"), },
|
||
//{ TEXT("Uacute"), TEXT("<22>"), },
|
||
//{ TEXT("Ucirc"), TEXT("<22>"), },
|
||
//{ TEXT("Uuml"), TEXT("<22>"), },
|
||
//{ TEXT("Yacute"), TEXT("<22>"), },
|
||
//{ TEXT("THORN"), TEXT("<22>"), },
|
||
//{ TEXT("szlig"), TEXT("<22>"), },
|
||
//{ TEXT("agrave"), TEXT("<22>"), },
|
||
//{ TEXT("aacute"), TEXT("<22>"), },
|
||
//{ TEXT("acirc"), TEXT("<22>"), },
|
||
//{ TEXT("atilde"), TEXT("<22>"), },
|
||
//{ TEXT("auml"), TEXT("<22>"), },
|
||
//{ TEXT("aring"), TEXT("<22>"), },
|
||
//{ TEXT("aelig"), TEXT("<22>"), },
|
||
//{ TEXT("ccedil"), TEXT("<22>"), },
|
||
//{ TEXT("egrave"), TEXT("<22>"), },
|
||
//{ TEXT("eacute"), TEXT("<22>"), },
|
||
//{ TEXT("ecirc"), TEXT("<22>"), },
|
||
//{ TEXT("euml"), TEXT("<22>"), },
|
||
//{ TEXT("igrave"), TEXT("<22>"), },
|
||
//{ TEXT("iacute"), TEXT("<22>"), },
|
||
//{ TEXT("icirc"), TEXT("<22>"), },
|
||
//{ TEXT("iuml"), TEXT("<22>"), },
|
||
//{ TEXT("eth"), TEXT("<22>"), },
|
||
//{ TEXT("ntilde"), TEXT("<22>"), },
|
||
//{ TEXT("ograve"), TEXT("<22>"), },
|
||
//{ TEXT("oacute"), TEXT("<22>"), },
|
||
//{ TEXT("ocirc"), TEXT("<22>"), },
|
||
//{ TEXT("otilde"), TEXT("<22>"), },
|
||
//{ TEXT("ouml"), TEXT("<22>"), },
|
||
//{ TEXT("divide"), TEXT("<22>"), },
|
||
//{ TEXT("oslash"), TEXT("<22>"), },
|
||
//{ TEXT("ugrave"), TEXT("<22>"), },
|
||
//{ TEXT("uacute"), TEXT("<22>"), },
|
||
//{ TEXT("ucirc"), TEXT("<22>"), },
|
||
//{ TEXT("uuml"), TEXT("<22>"), },
|
||
//{ TEXT("yacute"), TEXT("<22>"), },
|
||
//{ TEXT("thorn"), TEXT("<22>"), },
|
||
//{ TEXT("yuml"), TEXT("<22>"), },
|
||
|
||
NULL, 0
|
||
};
|
||
|
||
COXHTMLParser::COXHTMLParser()
|
||
{
|
||
m_FontStack.clear();
|
||
m_TextStyleStack.clear();
|
||
|
||
m_bErrorOnMissingTag = FALSE;
|
||
|
||
SetCaseSensitive(FALSE);
|
||
m_HTMLTagTable.SetCaseSensitive(FALSE);
|
||
|
||
// Fill hash table with tags
|
||
int i = 0;
|
||
for (i = 0; m_HTMLTags[i].szTag; i++)
|
||
m_HTMLTagTable.Add(m_HTMLTags[i].szTag, (DWORD)(INT_PTR) &(m_HTMLTags[i]));
|
||
|
||
// Add predefined HTML character entities to the entity list
|
||
for (i = 0; m_HTMLEntity[i].szName; i++)
|
||
m_EntityTable.Add(m_HTMLEntity[i].szName, (DWORD)(INT_PTR) m_HTMLEntity[i].szLiteral);
|
||
}
|
||
|
||
COXHTMLParser::~COXHTMLParser()
|
||
{
|
||
}
|
||
|
||
void COXHTMLParser::Clear()
|
||
{
|
||
COXParser::Clear();
|
||
|
||
// Clear out the font stack
|
||
for (UINT i = 0; i < m_FontStack.size(); i++)
|
||
{
|
||
COXParserElement* pElm = (COXParserElement*) m_FontStack[i];
|
||
delete pElm;
|
||
}
|
||
m_FontStack.clear();
|
||
m_TextStyleStack.clear();
|
||
}
|
||
|
||
BOOL COXHTMLParser::Initialize()
|
||
{
|
||
BOOL bResult = COXParser::Initialize();
|
||
|
||
PushTextStyle();
|
||
|
||
// Delete the ' tag from the entity table - HTML browsers don't seem to
|
||
// support it.
|
||
m_EntityTable.Remove(TEXT("apos"));
|
||
|
||
return bResult;
|
||
}
|
||
|
||
BOOL COXHTMLParser::Cleanup()
|
||
{
|
||
BOOL bResult = COXParser::Cleanup();
|
||
|
||
return bResult;
|
||
}
|
||
|
||
// Make a new copy of the font element
|
||
COXParserElement* COXHTMLParser::DuplicateFontElement(COXParserElement* pElement)
|
||
{
|
||
if (!pElement || !pElement->IsName(TEXT("font")))
|
||
return NULL;
|
||
|
||
COXParserElement* pFontElement = new COXParserElement(NULL, pElement->GetName());
|
||
for (int i = 0; i < pElement->NumAttributes(); i++)
|
||
{
|
||
if (!pElement->Attribute(i))
|
||
continue;
|
||
|
||
COXAttribute* pAttribute = new COXAttribute;
|
||
if (!pAttribute)
|
||
{
|
||
delete pFontElement;
|
||
return NULL;
|
||
}
|
||
|
||
pAttribute->SetName(pElement->Attribute(i)->GetName());
|
||
pAttribute->SetValue(pElement->Attribute(i)->GetStringValue());
|
||
|
||
pFontElement->AddAttribute(pAttribute);
|
||
}
|
||
|
||
return pFontElement;
|
||
}
|
||
|
||
COXParserElement* COXHTMLParser::ConstructFontElement()
|
||
{
|
||
// Check font stack
|
||
if (m_FontStack.size() <= 0)
|
||
return NULL;
|
||
|
||
// We need to combine the various font tags that have been encountered
|
||
// into one element. We go through the font stack and add attributes,
|
||
// and if we encounter a repeated attribute, then the latest one found
|
||
// takes precedence.
|
||
COXParserElement* pNewFontElement = new COXParserElement(NULL, TEXT("font"));
|
||
|
||
if (pNewFontElement)
|
||
{
|
||
for (UINT i = 0; i < m_FontStack.size(); i++)
|
||
{
|
||
// Get the "font" element off the top of the stack
|
||
COXParserElement* pFontElement = (COXParserElement*) m_FontStack[i];
|
||
if (!pFontElement) continue;
|
||
|
||
// Create a new element that matches this font element, and insert
|
||
// it between the parent and our new object. This effectively "wraps"
|
||
// the current text object in the current font attribute
|
||
for (int i = 0; i < pFontElement->NumAttributes(); i++)
|
||
{
|
||
COXAttribute* pFontAttr = pFontElement->Attribute(i);
|
||
if (!pFontAttr) continue;
|
||
|
||
COXAttribute* pAttr = pNewFontElement->FindAttribute(pFontAttr->GetName());
|
||
|
||
// If the attribute exists, copy over it, otherwise add it
|
||
if (pAttr)
|
||
pAttr->SetValue(pFontAttr->GetStringValue());
|
||
else
|
||
{
|
||
pAttr = new COXAttribute;
|
||
pAttr->SetName(pFontAttr->GetName());
|
||
pAttr->SetValue(pFontAttr->GetStringValue());
|
||
pNewFontElement->AddAttribute(pAttr);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return pNewFontElement;
|
||
}
|
||
|
||
COXParserObject* COXHTMLParser::ParseText(COXParserElement* pParent)
|
||
{
|
||
COXParserObject* pObject = COXParser::ParseText(pParent);
|
||
if (!pObject)
|
||
return NULL;
|
||
|
||
// Stop text modifiers working inside tables
|
||
//if (pParent->IsName(TEXT("TD")))
|
||
// return pObject;
|
||
|
||
// Set this text element within nested text modifier elements, depending
|
||
// on the current text modifiers that have been turned on.
|
||
|
||
UINT nTextStyle = GetTextStyle();
|
||
|
||
// Add a space to the end of the text
|
||
COXQuickString str = pObject->GetText();
|
||
|
||
// Strip out whitespace (unless formatted)
|
||
if (!(nTextStyle & TEXT_FORMATTED) && !(nTextStyle & TEXT_PRE))
|
||
str.Strip();
|
||
|
||
// add a space if necessary (160 = non-break space
|
||
TCHAR chNBSP = TEXT('<EFBFBD>'); // This is character 160, NOT character 32
|
||
if (str.GetLength() && str[str.GetLength()-1] != chNBSP)
|
||
str.Append(TEXT(' '));
|
||
|
||
pObject->SetText(str);
|
||
|
||
// Check current text mode
|
||
for (int i = 0; m_HTMLTags[i].szTag; i++)
|
||
{
|
||
// Don't add center tags here - they will be added as a wrap around
|
||
// the element holding this text
|
||
if (!m_HTMLTags[i].dwTextFlag || (m_HTMLTags[i].dwTextFlag & TEXT_CENTER))
|
||
continue;
|
||
|
||
|
||
// If TextModifiers[i] is on, then create a new element with its name,
|
||
// and add it to the beginning of the chain.
|
||
if (nTextStyle & m_HTMLTags[i].dwTextFlag)
|
||
{
|
||
COXParserElement* pElm = new COXParserElement(NULL, m_HTMLTags[i].szTag);
|
||
if (!pElm)
|
||
break;
|
||
|
||
pElm->AddObject(pObject); // Add new element to begining of chain
|
||
pObject = pElm; // Move to beginning of chain
|
||
}
|
||
}
|
||
|
||
COXParserElement* pNewFontElement = ConstructFontElement();
|
||
if (pNewFontElement)
|
||
{
|
||
pNewFontElement->AddObject(pObject);
|
||
pObject = pNewFontElement;
|
||
}
|
||
|
||
// Add a <p> wrapper if we have found standalone text
|
||
if (pParent->GetParent() == NULL)
|
||
{
|
||
COXParserElement* pElm = new COXParserElement(pParent, TEXT("p"));
|
||
pElm->SetFlags(GetTextStyle());
|
||
if (pElm)
|
||
{
|
||
pElm->AddObject(pObject);
|
||
pObject = pElm; // Move to beginning of chain
|
||
if (!ParseElement(pElm, 1))
|
||
{
|
||
delete pObject;
|
||
pObject = NULL;
|
||
}
|
||
}
|
||
}
|
||
|
||
return pObject;
|
||
}
|
||
|
||
BOOL COXHTMLParser::GetValueString(COXQuickString& str)
|
||
{
|
||
str.Empty();
|
||
str.SetLength(100);
|
||
|
||
TCHAR ch = GetNextChar();
|
||
while (ch && ch != m_chEndDelim && !_istspace(ch))
|
||
{
|
||
if (ch == m_chTagEnd)
|
||
{
|
||
TCHAR chNext = GetNextChar();
|
||
UngetChar();
|
||
if (chNext == m_chEndDelim)
|
||
break;
|
||
}
|
||
str.Append(ch);
|
||
ch = GetNextChar();
|
||
}
|
||
|
||
BOOL bResult = TRUE;
|
||
|
||
if (ch == m_chNULL)
|
||
{
|
||
ReportError(ERROR_END_OF_BUFFER, TEXT("Unexpected end of buffer while name."));
|
||
bResult = FALSE;
|
||
}
|
||
else
|
||
UngetChar();
|
||
|
||
if (!bResult)
|
||
str.Empty();
|
||
|
||
return bResult;
|
||
}
|
||
|
||
// Add new object to element. Insert text modifiers if necessary
|
||
void COXHTMLParser::AddObjectToElement(COXParserElement* pElement, COXParserObject* pObject)
|
||
{
|
||
if (!pObject)
|
||
return;
|
||
|
||
if (pObject->GetType() == COXParserObject::ELEMENT)
|
||
{
|
||
// wrap blocks with "center" if centering was in force
|
||
if ( (pObject->GetFlags() & TEXT_CENTER) && CanCenter(pObject->GetText()))
|
||
{
|
||
COXParserElement* pElm = new COXParserElement(NULL, TEXT("center"));
|
||
if (pElm)
|
||
{
|
||
pElm->AddObject(pObject); // Add new element to begining of chain
|
||
pObject = pElm; // Move to beginning of chain
|
||
}
|
||
}
|
||
}
|
||
|
||
pElement->AddObject(pObject);
|
||
}
|
||
|
||
// Modify the name/value parser so that all values are string values,
|
||
// and value=<string value> (ie no quotes) is OK. Also allow empty values,
|
||
// eg <tr nowrap>
|
||
BOOL COXHTMLParser::ParseAttributes(COXParserElement* pElement)
|
||
{
|
||
BOOL bResult = TRUE;
|
||
while (bResult)
|
||
{
|
||
bResult = GetToken(m_Token);
|
||
if (!bResult)
|
||
{
|
||
ReportError(ERROR_END_OF_BUFFER,
|
||
TEXT("Unexpected end of buffer while parsing attributes (Element %s)"),
|
||
pElement->GetName());
|
||
break;
|
||
}
|
||
|
||
// end of attribute list?
|
||
if (m_Token.GetType() != COXToken::STRING)
|
||
break;
|
||
|
||
// Create a new name/value
|
||
COXAttribute* pAttribute = new COXAttribute;
|
||
if (!pAttribute)
|
||
{
|
||
ReportError(ERROR_OUT_OF_MEMORY,
|
||
TEXT("Unable to create new attribute (Element %s)"),
|
||
pElement->GetName());
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
|
||
// Get the name of the name/value pair
|
||
COXQuickString str;
|
||
if (!GetNameToken(str))
|
||
{
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
pAttribute->SetName(str);
|
||
|
||
SAVEPOS pos;
|
||
SaveBufferPos(pos);
|
||
|
||
// May have "=" sign next
|
||
if (!GetToken(m_Token))
|
||
{
|
||
ReportError(ERROR_BAD_TOKEN,
|
||
TEXT("Error while parsing attribute (Element %s, name %s)."),
|
||
pElement->GetName(), pAttribute->GetName());
|
||
delete pAttribute;
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
|
||
// If an empty Attribute then continue on without searching for a value
|
||
if (m_Token.GetType() != COXToken::EQUAL_SIGN)
|
||
{
|
||
pAttribute->SetValue(TEXT(""));
|
||
pElement->AddAttribute(pAttribute);
|
||
RestoreBufferPos(pos);
|
||
continue;
|
||
}
|
||
|
||
// Should have a number, "string" or 'string' value next.
|
||
if (!GetToken(m_Token))
|
||
{
|
||
ReportError(ERROR_BAD_TOKEN,
|
||
TEXT("Error while parsing attribute (element %s, name %s)."),
|
||
pElement->GetName(), pAttribute->GetName());
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
|
||
if (m_Token.GetType() == COXToken::STRING)
|
||
{
|
||
if (!GetValueString(str))
|
||
{
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
pAttribute->SetValue(str);
|
||
}
|
||
else if (m_Token.GetType() == COXToken::QUOTE)
|
||
{
|
||
if ( !GetStringToken(str, TEXT('"')) )
|
||
{
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
pAttribute->SetValue(str);
|
||
}
|
||
else if (m_Token.GetType() == COXToken::APOSTROPHE)
|
||
{
|
||
if ( !GetStringToken(str, TEXT('\'')) )
|
||
{
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
pAttribute->SetValue(str);
|
||
}
|
||
else
|
||
{
|
||
ReportError(ERROR_UNEXPECTED_TOKEN,
|
||
TEXT("Unexpected token while parsing attribute (element %s, name %s)."),
|
||
pElement->GetName(), pAttribute->GetName());
|
||
bResult = FALSE;
|
||
break;
|
||
}
|
||
|
||
if (bResult)
|
||
pElement->AddAttribute(pAttribute);
|
||
}
|
||
|
||
return bResult;
|
||
}
|
||
|
||
// In HTML, some tags such as <br> don't have an end tag - so treat them
|
||
// as Empty Tags.
|
||
COXParserElement* COXHTMLParser::ParseStartTag(COXParserElement* pParent, BOOL& bEmptyTag)
|
||
{
|
||
COXParserElement* pElement = COXParser::ParseStartTag(pParent, bEmptyTag);
|
||
if (!pElement)
|
||
return NULL;
|
||
|
||
// Force to empty tag for "simple" tags (HR, BR etc)
|
||
if (IsEmptyTag(pElement->GetName()))
|
||
bEmptyTag = TRUE;
|
||
|
||
// Store the current text mode
|
||
pElement->SetFlags(GetTextStyle());
|
||
|
||
// Wrap empties with P's
|
||
if (pParent->GetParent() == NULL &&
|
||
!IsBlockTag(pElement->GetName()) &&
|
||
!IsHeadTag(pElement->GetName()) &&
|
||
!IsTextModifier(pElement->GetName()))
|
||
{
|
||
COXParserElement* pElm = new COXParserElement(NULL, TEXT("P"));
|
||
pElm->AddObject(pElement); // Add new element to begining of chain
|
||
pElement = pElm; // Move to beginning of chain
|
||
}
|
||
|
||
// For new tables push a new text style onto the stack
|
||
if (_tcsicmp(pElement->GetName(), TEXT("table")) == 0)
|
||
PushTextStyle();
|
||
|
||
return pElement;
|
||
}
|
||
|
||
BOOL COXHTMLParser::ParseEndTag(COXParserElement* pElement, COXQuickString& strEndTag)
|
||
{
|
||
BOOL bResult = COXParser::ParseEndTag(pElement, strEndTag);
|
||
if (!bResult)
|
||
return FALSE;
|
||
|
||
if ( !_tcsicmp(strEndTag, TEXT("table")) &&
|
||
!_tcsicmp(strEndTag, pElement->GetName()) )
|
||
{
|
||
PopTextStyle();
|
||
}
|
||
|
||
return TRUE;
|
||
}
|
||
|
||
// Text modifier tags <b>, <i> etc don't work in the normal way. We treat
|
||
// them as "toggles" and just get the base parser to ignore them
|
||
BOOL COXHTMLParser::IgnoreStartTag(COXParserElement* pElement, BOOL bEmptyTag)
|
||
{
|
||
UNUSED_ALWAYS(bEmptyTag);
|
||
|
||
BOOL bIgnore = FALSE;
|
||
|
||
// If we hit a font tag, then add it to the top of the font stack
|
||
if (pElement->IsName(TEXT("font")))
|
||
{
|
||
COXParserElement* pFontElement = DuplicateFontElement(pElement);
|
||
if (pFontElement)
|
||
m_FontStack.push_back(pFontElement);
|
||
|
||
bIgnore = TRUE;
|
||
}
|
||
else
|
||
{
|
||
TagDescriptor* pTag = GetTagDescriptor(pElement->GetName());
|
||
if (pTag && pTag->dwTextFlag)
|
||
{
|
||
//for (UINT i = 0; i < m_TextStyleStack.size(); i++)
|
||
// TRACE2("Before: Text style %d: %d\n",i,m_TextStyleStack[i]);
|
||
|
||
UINT nTextStyle = GetTextStyle();
|
||
SetTextStyle(nTextStyle | pTag->dwTextFlag);
|
||
|
||
//for (i = 0; i < m_TextStyleStack.size(); i++)
|
||
// TRACE2("After: Text style %d: %d\n",i,m_TextStyleStack[i]);
|
||
|
||
bIgnore = TRUE;
|
||
}
|
||
}
|
||
|
||
return bIgnore;
|
||
}
|
||
|
||
// Text modifier tags <b>, <i> etc don't work in the normal way. We treat
|
||
// them as "toggles" and just get the base parser to ignore them
|
||
BOOL COXHTMLParser::IgnoreEndTag(LPCTSTR szEndTag)
|
||
{
|
||
// If we come across a text modifier then we don't want to check for
|
||
// the normal pair - /pair nesting. The text modifiers merely turn-on
|
||
// and turn-off text attributes
|
||
BOOL bTextModifier = FALSE;
|
||
|
||
// If we hit a /font tag, then pop off the top font from the font stack
|
||
if (_tcsicmp(szEndTag, TEXT("font")) == 0)
|
||
{
|
||
if (m_FontStack.size())
|
||
{
|
||
COXParserElement* pFont = (COXParserElement*) m_FontStack.back();
|
||
delete pFont;
|
||
m_FontStack.pop_back();
|
||
}
|
||
bTextModifier = TRUE;
|
||
}
|
||
else
|
||
{
|
||
TagDescriptor* pTag = GetTagDescriptor(szEndTag);
|
||
if (pTag && pTag->dwTextFlag)
|
||
{
|
||
bTextModifier = TRUE;
|
||
|
||
//for (UINT i = 0; i < m_TextStyleStack.size(); i++)
|
||
// TRACE2("Before: Text style %d: %d\n",i,m_TextStyleStack[i]);
|
||
|
||
UINT nStyle = GetTextStyle();
|
||
SetTextStyle(nStyle & ~(pTag->dwTextFlag));
|
||
|
||
//for (i = 0; i < m_TextStyleStack.size(); i++)
|
||
// TRACE2("After: Text style %d: %d\n",i,m_TextStyleStack[i]);
|
||
}
|
||
}
|
||
|
||
return bTextModifier;
|
||
}
|
||
|
||
// Returns TRUE if the tag is an empty tag (eg HR, BR etc)
|
||
BOOL COXHTMLParser::IsEmptyTag(TagDescriptor* pTag)
|
||
{
|
||
if (!pTag)
|
||
return FALSE;
|
||
|
||
return ((pTag->dwTagFlag & COXHTMLParser::TAG_EMPTY) == COXHTMLParser::TAG_EMPTY);
|
||
}
|
||
|
||
BOOL COXHTMLParser::IsTextModifier(TagDescriptor* pTag)
|
||
{
|
||
if (!pTag)
|
||
return FALSE;
|
||
|
||
return (pTag->dwTextFlag > 0);
|
||
}
|
||
|
||
// Returns TRUE if the tag has an optional end tag (eg P, LI etc)
|
||
BOOL COXHTMLParser::IsOptionalEndTag(TagDescriptor* pTag)
|
||
{
|
||
if (!pTag)
|
||
return FALSE;
|
||
|
||
return ((pTag->dwTagFlag & COXHTMLParser::TAG_OPTEND) == COXHTMLParser::TAG_OPTEND);
|
||
}
|
||
|
||
// Returns TRUE if the tag is a special section tag (BODY and HEAD)
|
||
BOOL COXHTMLParser::IsSectionTag(TagDescriptor* pTag)
|
||
{
|
||
if (!pTag)
|
||
return FALSE;
|
||
|
||
return ((pTag->dwTagFlag & COXHTMLParser::TAG_SECTION) == COXHTMLParser::TAG_SECTION);
|
||
}
|
||
|
||
// Returns TRUE if the tag ends paragraphs (eg P, TABLE etc)
|
||
BOOL COXHTMLParser::IsBlockTag(TagDescriptor* pTag)
|
||
{
|
||
if (!pTag)
|
||
return FALSE;
|
||
|
||
return ((pTag->dwTagFlag & COXHTMLParser::TAG_BLOCK) == COXHTMLParser::TAG_BLOCK);
|
||
}
|
||
|
||
BOOL COXHTMLParser::IsHeadTag(TagDescriptor* pTag)
|
||
{
|
||
if (!pTag)
|
||
return FALSE;
|
||
|
||
return ((pTag->dwTagFlag & COXHTMLParser::TAG_HEAD) == COXHTMLParser::TAG_HEAD);
|
||
}
|
||
|
||
BOOL COXHTMLParser::CanCenter(TagDescriptor* pTag)
|
||
{
|
||
if (!pTag)
|
||
return FALSE;
|
||
|
||
return ((pTag->dwTagFlag & COXHTMLParser::TAG_CANCENTER) == COXHTMLParser::TAG_CANCENTER);
|
||
}
|
||
|
||
// This MUST be rewritten to use hash tables to get some speed
|
||
TagDescriptor* COXHTMLParser::GetTagDescriptor(LPCTSTR szTag) const
|
||
{
|
||
DWORD_PTR dwData;
|
||
if (!m_HTMLTagTable.Lookup(szTag, dwData))
|
||
return NULL;
|
||
else
|
||
return (TagDescriptor*) dwData;
|
||
}
|
||
|
||
// Return TRUE if
|
||
// a) szCurrentTag has an optional end tag, and szNewTag specifies a new element, or
|
||
// a) szNewTag is NULL and szCurrentTag has an optional end tag
|
||
// Returning TRUE means that an end tag should be inserted for szCurrentTag, and that
|
||
// szNewTag represents a new sibling element. Returning FALSE means that szNewTag
|
||
// represents a new child element of szCurrentTag
|
||
// Since this funnction may be called when either a new start or a new end tag has
|
||
// been found, NewTagIsEndTag specifies whether or not szNewTag is an end tag (TRUE)
|
||
// or a start tag (NewTagIsEndTag = FALSE)
|
||
BOOL COXHTMLParser::IsEndTagMissing(LPCTSTR szCurrentTag, LPCTSTR szNewTag,
|
||
BOOL NewTagIsEndTag)
|
||
{
|
||
if (!szCurrentTag || *szCurrentTag == 0)
|
||
return FALSE;
|
||
|
||
// Main HTML tag can only be left off when there is nothing else after it
|
||
if (_tcsicmp(szCurrentTag, TEXT("HTML")) == 0)
|
||
return (szNewTag == NULL || *szNewTag == 0);
|
||
|
||
TagDescriptor* pElementTag = GetTagDescriptor(szCurrentTag);
|
||
if (!pElementTag)
|
||
return FALSE;
|
||
|
||
TagDescriptor* pObjectTag = NULL;
|
||
if (szNewTag && *szNewTag)
|
||
{
|
||
pObjectTag = GetTagDescriptor(szNewTag);
|
||
if (!pObjectTag)
|
||
return FALSE;
|
||
}
|
||
|
||
// Deal with main sections (HEAD, BODY) first
|
||
if ( IsSectionTag(pElementTag))
|
||
return (pObjectTag? IsSectionTag(pObjectTag) : TRUE);
|
||
|
||
// Check that the tag we are now dealing with has an optional end.
|
||
if (!IsOptionalEndTag(pElementTag))
|
||
return FALSE;
|
||
|
||
// We have an optional end tag - if no more data then everything is fine
|
||
if (szNewTag == NULL || *szNewTag == 0)
|
||
return TRUE;
|
||
|
||
// Text modifiers do not mean a new element
|
||
if (IsTextModifier(pObjectTag))
|
||
return FALSE;
|
||
|
||
// Certain optionally ended tags can only be ended with certain other tags
|
||
|
||
if ( _tcsicmp(szCurrentTag, TEXT("P")) == 0 )
|
||
return IsBlockTag(pObjectTag);
|
||
|
||
if ( _tcsicmp(szCurrentTag, TEXT("LI")) == 0 )
|
||
return ( _tcsicmp(szNewTag, TEXT("UL")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("OL")) == 0);
|
||
|
||
if ( _tcsicmp(szCurrentTag, TEXT("TR")) == 0 )
|
||
{
|
||
if (NewTagIsEndTag)
|
||
return (_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
|
||
else
|
||
return (_tcsicmp(szNewTag, TEXT("TR")) == 0);
|
||
}
|
||
|
||
if ( _tcsicmp(szCurrentTag, TEXT("TD")) == 0 )
|
||
{
|
||
if (NewTagIsEndTag)
|
||
return ( _tcsicmp(szNewTag, TEXT("TR")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
|
||
else
|
||
return ( _tcsicmp(szNewTag, TEXT("TD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TR")) == 0 );
|
||
}
|
||
|
||
if ( _tcsicmp(szCurrentTag, TEXT("DT")) == 0 ||
|
||
_tcsicmp(szCurrentTag, TEXT("DD")) == 0 )
|
||
{
|
||
if (NewTagIsEndTag)
|
||
return ( _tcsicmp(szNewTag, TEXT("DT")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("DD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("DL")) == 0);
|
||
else
|
||
return ( _tcsicmp(szNewTag, TEXT("DT")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("DD")) == 0 );
|
||
}
|
||
|
||
if ( _tcsicmp(szCurrentTag, TEXT("THEAD")) == 0 ||
|
||
_tcsicmp(szCurrentTag, TEXT("TFOOT")) == 0 ||
|
||
_tcsicmp(szCurrentTag, TEXT("TBODY")) == 0 )
|
||
{
|
||
if (NewTagIsEndTag)
|
||
return ( _tcsicmp(szNewTag, TEXT("TFOOT")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TBODY")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
|
||
else
|
||
return ( _tcsicmp(szNewTag, TEXT("TFOOT")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TBODY")) == 0 );
|
||
}
|
||
|
||
if ( _tcsicmp(szCurrentTag, TEXT("COLGROUP")) == 0 )
|
||
{
|
||
if (NewTagIsEndTag)
|
||
return ( _tcsicmp(szNewTag, TEXT("COLGROUP")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TR")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TFOOT")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
|
||
else
|
||
return ( _tcsicmp(szNewTag, TEXT("COLGROUP")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TR")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
|
||
_tcsicmp(szNewTag, TEXT("TFOOT")) == 0 );
|
||
}
|
||
|
||
// pElement is optionally ended, followed by a new tag that is not a text
|
||
// modifier. By default we close off pElement and start a new sibling element
|
||
return TRUE;
|
||
}
|
||
|
||
BOOL COXHTMLParser::WriteAttributes(HANDLE hFile, COXParserElement* pElement)
|
||
{
|
||
USES_CONVERSION;
|
||
|
||
static char buffer[512];
|
||
static DWORD nCount;
|
||
|
||
for (int i = 0; i < pElement->NumAttributes(); i++)
|
||
{
|
||
COXAttribute* pAttribute = pElement->Attribute(i);
|
||
if (!pAttribute) continue;
|
||
|
||
UTBStr::sprintf(buffer, 512, " %s", T2A((LPTSTR) pAttribute->GetName()));
|
||
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
|
||
return FALSE;
|
||
|
||
LPCTSTR szValue = pAttribute->GetStringValue();
|
||
if (szValue && *szValue)
|
||
{
|
||
UTBStr::sprintf(buffer, 512, "=\"%s\"", T2A((LPTSTR) pAttribute->GetStringValue()));
|
||
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
|
||
return FALSE;
|
||
}
|
||
}
|
||
|
||
return TRUE;
|
||
}
|
||
|
||
BOOL COXHTMLParser::WriteElement(HANDLE hFile, COXParserElement* pElement, int nLevel)
|
||
{
|
||
USES_CONVERSION;
|
||
|
||
static char buffer[512];
|
||
static DWORD nCount;
|
||
|
||
if (pElement->GetType() != COXParserObject::ELEMENT)
|
||
return FALSE;
|
||
|
||
BOOL bSeparateLine = ( IsBlockTag(pElement->GetName()) ||
|
||
IsHeadTag(pElement->GetName()) ||
|
||
IsSectionTag(pElement->GetName()) ||
|
||
pElement->IsName(TEXT("br")) );
|
||
|
||
if (bSeparateLine)
|
||
{
|
||
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
|
||
return FALSE;
|
||
}
|
||
|
||
UTBStr::sprintf(buffer, 512, "<%s", T2A((LPTSTR)pElement->GetName()));
|
||
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
|
||
return FALSE;
|
||
|
||
if (!WriteAttributes(hFile, pElement))
|
||
return FALSE;
|
||
|
||
if (!::WriteFile(hFile, ">", 1, &nCount, NULL))
|
||
return FALSE;
|
||
|
||
for (int i = 0; i < pElement->NumObjects(); i++)
|
||
WriteObject(hFile, pElement->Object(i), nLevel+1);
|
||
|
||
//if (!WriteTabs(hFile, nLevel))
|
||
// return FALSE;
|
||
|
||
if (!IsEmptyTag(pElement->GetName()))
|
||
{
|
||
UTBStr::sprintf(buffer, 512, "</%s>", T2A((LPTSTR)pElement->GetName()));
|
||
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
|
||
return FALSE;
|
||
|
||
if (bSeparateLine)
|
||
{
|
||
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
|
||
return FALSE;
|
||
}
|
||
}
|
||
|
||
return TRUE;
|
||
}
|
||
|
||
BOOL COXHTMLParser::WriteText(HANDLE hFile, COXParserObject* pObject, int nLevel)
|
||
{
|
||
USES_CONVERSION;
|
||
UNUSED_ALWAYS(nLevel);
|
||
|
||
if (pObject->GetType() != COXParserObject::PLAINTEXT)
|
||
return FALSE;
|
||
|
||
BOOL bSeparateLine = FALSE;
|
||
if (pObject->GetParent())
|
||
{
|
||
bSeparateLine = ( IsBlockTag(pObject->GetParent()->GetName()) ||
|
||
IsSectionTag(pObject->GetParent()->GetName()) ||
|
||
pObject->GetParent()->IsName(TEXT("br")) );
|
||
}
|
||
|
||
DWORD nCount;
|
||
if (bSeparateLine)
|
||
{
|
||
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
|
||
return FALSE;
|
||
}
|
||
|
||
COXQuickString str = EncodeText(pObject->GetText());
|
||
if (str.IsEmpty())
|
||
return FALSE;
|
||
|
||
if (!::WriteFile(hFile, T2A((LPTSTR)str.GetString()), str.GetLength(), &nCount, NULL))
|
||
return FALSE;
|
||
|
||
if (bSeparateLine)
|
||
{
|
||
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
|
||
return FALSE;
|
||
}
|
||
|
||
return TRUE;
|
||
}
|
||
|
||
LPCTSTR COXHTMLParser::TranslateErrorCode(int nErrorCode)
|
||
{
|
||
switch (nErrorCode)
|
||
{
|
||
case WARNING_UNKNOWN_TAG: return TEXT("Unknown tag found");
|
||
|
||
default:
|
||
/* fall through */;
|
||
}
|
||
return COXParser::TranslateErrorCode(nErrorCode);
|
||
}
|