// TAG_EMPTY - tags that do not have a closing tag at all (eg
wrapper if we have found standalone text
if (pParent->GetParent() == NULL)
{
COXParserElement* pElm = new COXParserElement(pParent, TEXT("p"));
pElm->SetFlags(GetTextStyle());
if (pElm)
{
pElm->AddObject(pObject);
pObject = pElm; // Move to beginning of chain
if (!ParseElement(pElm, 1))
{
delete pObject;
pObject = NULL;
}
}
}
return pObject;
}
BOOL COXHTMLParser::GetValueString(COXQuickString& str)
{
str.Empty();
str.SetLength(100);
TCHAR ch = GetNextChar();
while (ch && ch != m_chEndDelim && !_istspace(ch))
{
if (ch == m_chTagEnd)
{
TCHAR chNext = GetNextChar();
UngetChar();
if (chNext == m_chEndDelim)
break;
}
str.Append(ch);
ch = GetNextChar();
}
BOOL bResult = TRUE;
if (ch == m_chNULL)
{
ReportError(ERROR_END_OF_BUFFER, TEXT("Unexpected end of buffer while name."));
bResult = FALSE;
}
else
UngetChar();
if (!bResult)
str.Empty();
return bResult;
}
// Add new object to element. Insert text modifiers if necessary
void COXHTMLParser::AddObjectToElement(COXParserElement* pElement, COXParserObject* pObject)
{
if (!pObject)
return;
if (pObject->GetType() == COXParserObject::ELEMENT)
{
// wrap blocks with "center" if centering was in force
if ( (pObject->GetFlags() & TEXT_CENTER) && CanCenter(pObject->GetText()))
{
COXParserElement* pElm = new COXParserElement(NULL, TEXT("center"));
if (pElm)
{
pElm->AddObject(pObject); // Add new element to begining of chain
pObject = pElm; // Move to beginning of chain
}
}
}
pElement->AddObject(pObject);
}
// Modify the name/value parser so that all values are string values,
// and value=
BOOL COXHTMLParser::ParseAttributes(COXParserElement* pElement)
{
BOOL bResult = TRUE;
while (bResult)
{
bResult = GetToken(m_Token);
if (!bResult)
{
ReportError(ERROR_END_OF_BUFFER,
TEXT("Unexpected end of buffer while parsing attributes (Element %s)"),
pElement->GetName());
break;
}
// end of attribute list?
if (m_Token.GetType() != COXToken::STRING)
break;
// Create a new name/value
COXAttribute* pAttribute = new COXAttribute;
if (!pAttribute)
{
ReportError(ERROR_OUT_OF_MEMORY,
TEXT("Unable to create new attribute (Element %s)"),
pElement->GetName());
bResult = FALSE;
break;
}
// Get the name of the name/value pair
COXQuickString str;
if (!GetNameToken(str))
{
bResult = FALSE;
break;
}
pAttribute->SetName(str);
SAVEPOS pos;
SaveBufferPos(pos);
// May have "=" sign next
if (!GetToken(m_Token))
{
ReportError(ERROR_BAD_TOKEN,
TEXT("Error while parsing attribute (Element %s, name %s)."),
pElement->GetName(), pAttribute->GetName());
delete pAttribute;
bResult = FALSE;
break;
}
// If an empty Attribute then continue on without searching for a value
if (m_Token.GetType() != COXToken::EQUAL_SIGN)
{
pAttribute->SetValue(TEXT(""));
pElement->AddAttribute(pAttribute);
RestoreBufferPos(pos);
continue;
}
// Should have a number, "string" or 'string' value next.
if (!GetToken(m_Token))
{
ReportError(ERROR_BAD_TOKEN,
TEXT("Error while parsing attribute (element %s, name %s)."),
pElement->GetName(), pAttribute->GetName());
bResult = FALSE;
break;
}
if (m_Token.GetType() == COXToken::STRING)
{
if (!GetValueString(str))
{
bResult = FALSE;
break;
}
pAttribute->SetValue(str);
}
else if (m_Token.GetType() == COXToken::QUOTE)
{
if ( !GetStringToken(str, TEXT('"')) )
{
bResult = FALSE;
break;
}
pAttribute->SetValue(str);
}
else if (m_Token.GetType() == COXToken::APOSTROPHE)
{
if ( !GetStringToken(str, TEXT('\'')) )
{
bResult = FALSE;
break;
}
pAttribute->SetValue(str);
}
else
{
ReportError(ERROR_UNEXPECTED_TOKEN,
TEXT("Unexpected token while parsing attribute (element %s, name %s)."),
pElement->GetName(), pAttribute->GetName());
bResult = FALSE;
break;
}
if (bResult)
pElement->AddAttribute(pAttribute);
}
return bResult;
}
// In HTML, some tags such as
don't have an end tag - so treat them
// as Empty Tags.
COXParserElement* COXHTMLParser::ParseStartTag(COXParserElement* pParent, BOOL& bEmptyTag)
{
COXParserElement* pElement = COXParser::ParseStartTag(pParent, bEmptyTag);
if (!pElement)
return NULL;
// Force to empty tag for "simple" tags (HR, BR etc)
if (IsEmptyTag(pElement->GetName()))
bEmptyTag = TRUE;
// Store the current text mode
pElement->SetFlags(GetTextStyle());
// Wrap empties with P's
if (pParent->GetParent() == NULL &&
!IsBlockTag(pElement->GetName()) &&
!IsHeadTag(pElement->GetName()) &&
!IsTextModifier(pElement->GetName()))
{
COXParserElement* pElm = new COXParserElement(NULL, TEXT("P"));
pElm->AddObject(pElement); // Add new element to begining of chain
pElement = pElm; // Move to beginning of chain
}
// For new tables push a new text style onto the stack
if (_tcsicmp(pElement->GetName(), TEXT("table")) == 0)
PushTextStyle();
return pElement;
}
BOOL COXHTMLParser::ParseEndTag(COXParserElement* pElement, COXQuickString& strEndTag)
{
BOOL bResult = COXParser::ParseEndTag(pElement, strEndTag);
if (!bResult)
return FALSE;
if ( !_tcsicmp(strEndTag, TEXT("table")) &&
!_tcsicmp(strEndTag, pElement->GetName()) )
{
PopTextStyle();
}
return TRUE;
}
// Text modifier tags , etc don't work in the normal way. We treat
// them as "toggles" and just get the base parser to ignore them
BOOL COXHTMLParser::IgnoreStartTag(COXParserElement* pElement, BOOL bEmptyTag)
{
UNUSED_ALWAYS(bEmptyTag);
BOOL bIgnore = FALSE;
// If we hit a font tag, then add it to the top of the font stack
if (pElement->IsName(TEXT("font")))
{
COXParserElement* pFontElement = DuplicateFontElement(pElement);
if (pFontElement)
m_FontStack.push_back(pFontElement);
bIgnore = TRUE;
}
else
{
TagDescriptor* pTag = GetTagDescriptor(pElement->GetName());
if (pTag && pTag->dwTextFlag)
{
//for (UINT i = 0; i < m_TextStyleStack.size(); i++)
// TRACE2("Before: Text style %d: %d\n",i,m_TextStyleStack[i]);
UINT nTextStyle = GetTextStyle();
SetTextStyle(nTextStyle | pTag->dwTextFlag);
//for (i = 0; i < m_TextStyleStack.size(); i++)
// TRACE2("After: Text style %d: %d\n",i,m_TextStyleStack[i]);
bIgnore = TRUE;
}
}
return bIgnore;
}
// Text modifier tags , etc don't work in the normal way. We treat
// them as "toggles" and just get the base parser to ignore them
BOOL COXHTMLParser::IgnoreEndTag(LPCTSTR szEndTag)
{
// If we come across a text modifier then we don't want to check for
// the normal pair - /pair nesting. The text modifiers merely turn-on
// and turn-off text attributes
BOOL bTextModifier = FALSE;
// If we hit a /font tag, then pop off the top font from the font stack
if (_tcsicmp(szEndTag, TEXT("font")) == 0)
{
if (m_FontStack.size())
{
COXParserElement* pFont = (COXParserElement*) m_FontStack.back();
delete pFont;
m_FontStack.pop_back();
}
bTextModifier = TRUE;
}
else
{
TagDescriptor* pTag = GetTagDescriptor(szEndTag);
if (pTag && pTag->dwTextFlag)
{
bTextModifier = TRUE;
//for (UINT i = 0; i < m_TextStyleStack.size(); i++)
// TRACE2("Before: Text style %d: %d\n",i,m_TextStyleStack[i]);
UINT nStyle = GetTextStyle();
SetTextStyle(nStyle & ~(pTag->dwTextFlag));
//for (i = 0; i < m_TextStyleStack.size(); i++)
// TRACE2("After: Text style %d: %d\n",i,m_TextStyleStack[i]);
}
}
return bTextModifier;
}
// Returns TRUE if the tag is an empty tag (eg HR, BR etc)
BOOL COXHTMLParser::IsEmptyTag(TagDescriptor* pTag)
{
if (!pTag)
return FALSE;
return ((pTag->dwTagFlag & COXHTMLParser::TAG_EMPTY) == COXHTMLParser::TAG_EMPTY);
}
BOOL COXHTMLParser::IsTextModifier(TagDescriptor* pTag)
{
if (!pTag)
return FALSE;
return (pTag->dwTextFlag > 0);
}
// Returns TRUE if the tag has an optional end tag (eg P, LI etc)
BOOL COXHTMLParser::IsOptionalEndTag(TagDescriptor* pTag)
{
if (!pTag)
return FALSE;
return ((pTag->dwTagFlag & COXHTMLParser::TAG_OPTEND) == COXHTMLParser::TAG_OPTEND);
}
// Returns TRUE if the tag is a special section tag (BODY and HEAD)
BOOL COXHTMLParser::IsSectionTag(TagDescriptor* pTag)
{
if (!pTag)
return FALSE;
return ((pTag->dwTagFlag & COXHTMLParser::TAG_SECTION) == COXHTMLParser::TAG_SECTION);
}
// Returns TRUE if the tag ends paragraphs (eg P, TABLE etc)
BOOL COXHTMLParser::IsBlockTag(TagDescriptor* pTag)
{
if (!pTag)
return FALSE;
return ((pTag->dwTagFlag & COXHTMLParser::TAG_BLOCK) == COXHTMLParser::TAG_BLOCK);
}
BOOL COXHTMLParser::IsHeadTag(TagDescriptor* pTag)
{
if (!pTag)
return FALSE;
return ((pTag->dwTagFlag & COXHTMLParser::TAG_HEAD) == COXHTMLParser::TAG_HEAD);
}
BOOL COXHTMLParser::CanCenter(TagDescriptor* pTag)
{
if (!pTag)
return FALSE;
return ((pTag->dwTagFlag & COXHTMLParser::TAG_CANCENTER) == COXHTMLParser::TAG_CANCENTER);
}
// This MUST be rewritten to use hash tables to get some speed
TagDescriptor* COXHTMLParser::GetTagDescriptor(LPCTSTR szTag) const
{
DWORD_PTR dwData;
if (!m_HTMLTagTable.Lookup(szTag, dwData))
return NULL;
else
return (TagDescriptor*) dwData;
}
// Return TRUE if
// a) szCurrentTag has an optional end tag, and szNewTag specifies a new element, or
// a) szNewTag is NULL and szCurrentTag has an optional end tag
// Returning TRUE means that an end tag should be inserted for szCurrentTag, and that
// szNewTag represents a new sibling element. Returning FALSE means that szNewTag
// represents a new child element of szCurrentTag
// Since this funnction may be called when either a new start or a new end tag has
// been found, NewTagIsEndTag specifies whether or not szNewTag is an end tag (TRUE)
// or a start tag (NewTagIsEndTag = FALSE)
BOOL COXHTMLParser::IsEndTagMissing(LPCTSTR szCurrentTag, LPCTSTR szNewTag,
BOOL NewTagIsEndTag)
{
if (!szCurrentTag || *szCurrentTag == 0)
return FALSE;
// Main HTML tag can only be left off when there is nothing else after it
if (_tcsicmp(szCurrentTag, TEXT("HTML")) == 0)
return (szNewTag == NULL || *szNewTag == 0);
TagDescriptor* pElementTag = GetTagDescriptor(szCurrentTag);
if (!pElementTag)
return FALSE;
TagDescriptor* pObjectTag = NULL;
if (szNewTag && *szNewTag)
{
pObjectTag = GetTagDescriptor(szNewTag);
if (!pObjectTag)
return FALSE;
}
// Deal with main sections (HEAD, BODY) first
if ( IsSectionTag(pElementTag))
return (pObjectTag? IsSectionTag(pObjectTag) : TRUE);
// Check that the tag we are now dealing with has an optional end.
if (!IsOptionalEndTag(pElementTag))
return FALSE;
// We have an optional end tag - if no more data then everything is fine
if (szNewTag == NULL || *szNewTag == 0)
return TRUE;
// Text modifiers do not mean a new element
if (IsTextModifier(pObjectTag))
return FALSE;
// Certain optionally ended tags can only be ended with certain other tags
if ( _tcsicmp(szCurrentTag, TEXT("P")) == 0 )
return IsBlockTag(pObjectTag);
if ( _tcsicmp(szCurrentTag, TEXT("LI")) == 0 )
return ( _tcsicmp(szNewTag, TEXT("UL")) == 0 ||
_tcsicmp(szNewTag, TEXT("OL")) == 0);
if ( _tcsicmp(szCurrentTag, TEXT("TR")) == 0 )
{
if (NewTagIsEndTag)
return (_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
else
return (_tcsicmp(szNewTag, TEXT("TR")) == 0);
}
if ( _tcsicmp(szCurrentTag, TEXT("TD")) == 0 )
{
if (NewTagIsEndTag)
return ( _tcsicmp(szNewTag, TEXT("TR")) == 0 ||
_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
else
return ( _tcsicmp(szNewTag, TEXT("TD")) == 0 ||
_tcsicmp(szNewTag, TEXT("TR")) == 0 );
}
if ( _tcsicmp(szCurrentTag, TEXT("DT")) == 0 ||
_tcsicmp(szCurrentTag, TEXT("DD")) == 0 )
{
if (NewTagIsEndTag)
return ( _tcsicmp(szNewTag, TEXT("DT")) == 0 ||
_tcsicmp(szNewTag, TEXT("DD")) == 0 ||
_tcsicmp(szNewTag, TEXT("DL")) == 0);
else
return ( _tcsicmp(szNewTag, TEXT("DT")) == 0 ||
_tcsicmp(szNewTag, TEXT("DD")) == 0 );
}
if ( _tcsicmp(szCurrentTag, TEXT("THEAD")) == 0 ||
_tcsicmp(szCurrentTag, TEXT("TFOOT")) == 0 ||
_tcsicmp(szCurrentTag, TEXT("TBODY")) == 0 )
{
if (NewTagIsEndTag)
return ( _tcsicmp(szNewTag, TEXT("TFOOT")) == 0 ||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
_tcsicmp(szNewTag, TEXT("TBODY")) == 0 ||
_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
else
return ( _tcsicmp(szNewTag, TEXT("TFOOT")) == 0 ||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
_tcsicmp(szNewTag, TEXT("TBODY")) == 0 );
}
if ( _tcsicmp(szCurrentTag, TEXT("COLGROUP")) == 0 )
{
if (NewTagIsEndTag)
return ( _tcsicmp(szNewTag, TEXT("COLGROUP")) == 0 ||
_tcsicmp(szNewTag, TEXT("TR")) == 0 ||
_tcsicmp(szNewTag, TEXT("TD")) == 0 ||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
_tcsicmp(szNewTag, TEXT("TFOOT")) == 0 ||
_tcsicmp(szNewTag, TEXT("TABLE")) == 0);
else
return ( _tcsicmp(szNewTag, TEXT("COLGROUP")) == 0 ||
_tcsicmp(szNewTag, TEXT("TR")) == 0 ||
_tcsicmp(szNewTag, TEXT("TD")) == 0 ||
_tcsicmp(szNewTag, TEXT("THEAD")) == 0 ||
_tcsicmp(szNewTag, TEXT("TFOOT")) == 0 );
}
// pElement is optionally ended, followed by a new tag that is not a text
// modifier. By default we close off pElement and start a new sibling element
return TRUE;
}
BOOL COXHTMLParser::WriteAttributes(HANDLE hFile, COXParserElement* pElement)
{
USES_CONVERSION;
static char buffer[512];
static DWORD nCount;
for (int i = 0; i < pElement->NumAttributes(); i++)
{
COXAttribute* pAttribute = pElement->Attribute(i);
if (!pAttribute) continue;
UTBStr::sprintf(buffer, 512, " %s", T2A((LPTSTR) pAttribute->GetName()));
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
return FALSE;
LPCTSTR szValue = pAttribute->GetStringValue();
if (szValue && *szValue)
{
UTBStr::sprintf(buffer, 512, "=\"%s\"", T2A((LPTSTR) pAttribute->GetStringValue()));
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
return FALSE;
}
}
return TRUE;
}
BOOL COXHTMLParser::WriteElement(HANDLE hFile, COXParserElement* pElement, int nLevel)
{
USES_CONVERSION;
static char buffer[512];
static DWORD nCount;
if (pElement->GetType() != COXParserObject::ELEMENT)
return FALSE;
BOOL bSeparateLine = ( IsBlockTag(pElement->GetName()) ||
IsHeadTag(pElement->GetName()) ||
IsSectionTag(pElement->GetName()) ||
pElement->IsName(TEXT("br")) );
if (bSeparateLine)
{
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
return FALSE;
}
UTBStr::sprintf(buffer, 512, "<%s", T2A((LPTSTR)pElement->GetName()));
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
return FALSE;
if (!WriteAttributes(hFile, pElement))
return FALSE;
if (!::WriteFile(hFile, ">", 1, &nCount, NULL))
return FALSE;
for (int i = 0; i < pElement->NumObjects(); i++)
WriteObject(hFile, pElement->Object(i), nLevel+1);
//if (!WriteTabs(hFile, nLevel))
// return FALSE;
if (!IsEmptyTag(pElement->GetName()))
{
UTBStr::sprintf(buffer, 512, "%s>", T2A((LPTSTR)pElement->GetName()));
if (!::WriteFile(hFile, buffer, PtrToUlong(strlen(buffer)), &nCount, NULL))
return FALSE;
if (bSeparateLine)
{
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
return FALSE;
}
}
return TRUE;
}
BOOL COXHTMLParser::WriteText(HANDLE hFile, COXParserObject* pObject, int nLevel)
{
USES_CONVERSION;
UNUSED_ALWAYS(nLevel);
if (pObject->GetType() != COXParserObject::PLAINTEXT)
return FALSE;
BOOL bSeparateLine = FALSE;
if (pObject->GetParent())
{
bSeparateLine = ( IsBlockTag(pObject->GetParent()->GetName()) ||
IsSectionTag(pObject->GetParent()->GetName()) ||
pObject->GetParent()->IsName(TEXT("br")) );
}
DWORD nCount;
if (bSeparateLine)
{
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
return FALSE;
}
COXQuickString str = EncodeText(pObject->GetText());
if (str.IsEmpty())
return FALSE;
if (!::WriteFile(hFile, T2A((LPTSTR)str.GetString()), str.GetLength(), &nCount, NULL))
return FALSE;
if (bSeparateLine)
{
if (!::WriteFile(hFile, "\r\n", 2, &nCount, NULL))
return FALSE;
}
return TRUE;
}
LPCTSTR COXHTMLParser::TranslateErrorCode(int nErrorCode)
{
switch (nErrorCode)
{
case WARNING_UNKNOWN_TAG: return TEXT("Unknown tag found");
default:
/* fall through */;
}
return COXParser::TranslateErrorCode(nErrorCode);
}