583 lines
21 KiB
C++
583 lines
21 KiB
C++
// ==========================================================================
|
|
// Class Specification : COXCsvFile
|
|
// ==========================================================================
|
|
|
|
// Header file : OXCsvFile.h
|
|
|
|
// Version: 9.3
|
|
|
|
// This software along with its related components, documentation and files ("The Libraries")
|
|
// is © 1994-2007 The Code Project (1612916 Ontario Limited) and use of The Libraries is
|
|
// governed by a software license agreement ("Agreement"). Copies of the Agreement are
|
|
// available at The Code Project (www.codeproject.com), as part of the package you downloaded
|
|
// to obtain this file, or directly from our office. For a copy of the license governing
|
|
// this software, you may contact us at legalaffairs@codeproject.com, or by calling 416-849-8900.
|
|
|
|
// //////////////////////////////////////////////////////////////////////////
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
/*
|
|
|
|
DESCRIPTION
|
|
|
|
|
|
Hierarchy:
|
|
CObject
|
|
CFile
|
|
CStdioFile
|
|
COXCsvFile
|
|
|
|
COXCsvFile extends the MFC class CStdioFile by adding functions for
|
|
processing the lines of a CStdioFile as rows in a comma delimited data
|
|
file. Each column, or field, of a row can be accessed independently.
|
|
When reading the data from the fields, the field name is used to determine
|
|
which field on a row to access. This allows a programmer to process
|
|
the fields irregardless of their order within the comma delimited file.
|
|
One rule for these files is that the fields have to be named. Preferably
|
|
the names of the fields will be given by the first row in the file when
|
|
it is being read in or alternately a programmer can assign them at run-time.
|
|
|
|
|
|
The reading/writing information from/to CSV file can be described in the
|
|
following steps.
|
|
|
|
|
|
Reading from CSV file.
|
|
|
|
1) Instantiate COXCsvFile by specifying path to the CSV file or pointer to
|
|
already opened FILE object as a parameter in the object's constructor
|
|
|
|
2) If the CSV file contains the name of columns as one of its line (usually
|
|
it is the first line unless you are trying to read CSV file with multiple
|
|
tables) then initialize internal array of columns name using the following
|
|
function:
|
|
|
|
BOOL GetColumns();
|
|
|
|
After columns are initialized you might retrieve the number of columns found
|
|
in the CSV using:
|
|
|
|
int GetColumnCount() const;
|
|
|
|
|
|
If the CSV file doesn't contain the name of columns then you have to
|
|
specify them explicitely using:
|
|
|
|
void SetColumns();
|
|
|
|
3) After column info has been gathered/set we can read the contents of the CSV
|
|
file row by row using the following function:
|
|
|
|
BOOL ReadLine();
|
|
|
|
If this function succeed then we can retrieve data in particular column using
|
|
set of ReadColumn() functions.
|
|
|
|
Note, that COXCsvFile class allows you to read CSV files with multiple
|
|
tables. Refer to the CsvTest sample in the .\samples\database\CsvTest
|
|
subdirectory for an example of reading CSV files with multiple tables.
|
|
|
|
|
|
|
|
Writing to CSV file.
|
|
|
|
1) Instantiate COXCsvFile by specifying path to the CSV file or pointer to
|
|
already opened FILE object as a parameter in the object's constructor
|
|
|
|
2) If you want the resulting CSV file to contain the name of columns as one of
|
|
its line (usually it is the first line unless you are trying to write CSV file
|
|
with multiple tables) then initialize internal array of columns name using the
|
|
following function:
|
|
|
|
void SetColumns();
|
|
|
|
Then you must call the following function in order to physically write the
|
|
headers in to the file:
|
|
|
|
void WriteHeaders();
|
|
|
|
In the case you don't want to include headers you have to call the following
|
|
function that will prepare the CSV file for writing data:
|
|
|
|
void Initialize();
|
|
|
|
3) After the column info has been specified we can populate the CSV file with data.
|
|
You can set data to particular column in the current row using set of
|
|
WriteColumn() functions.
|
|
|
|
When you set data to every column you can call WriteLine() function in order to
|
|
save the current row to the CSV file.
|
|
|
|
Note, that COXCsvFile class allows you to write CSV files with multiple
|
|
tables. Refer to the CsvTest sample in the .\samples\database\CsvTest
|
|
subdirectory for an example of writing CSV files with multiple tables.
|
|
|
|
|
|
Refer to the COXCsvFile class reference for full list of public functions.
|
|
|
|
|
|
Dependency:
|
|
#include "OXCsvFile.h"
|
|
|
|
|
|
Source code files:
|
|
"OXCsvFile.cpp"
|
|
|
|
|
|
|
|
*/
|
|
|
|
#if !defined(__CSVFILE_H__)
|
|
#define __CSVFILE_H__
|
|
|
|
#if _MSC_VER > 1000
|
|
#pragma once
|
|
#endif // _MSC_VER > 1000
|
|
|
|
#include "OXDllExt.h"
|
|
|
|
#ifndef __AFXTEMPL_H__
|
|
#include <afxtempl.h>
|
|
#define __AFXTEMPL_H__
|
|
#endif
|
|
|
|
|
|
|
|
class OX_CLASS_DECL COXCsvFile : public CStdioFile
|
|
{
|
|
DECLARE_DYNAMIC(COXCsvFile)
|
|
|
|
public:
|
|
enum Errors { errNone=0, errBadColumnIndex, errColName,
|
|
errNumericValue, errNotInSet, errIncompleteLine,
|
|
errTooManyColumns };
|
|
|
|
enum Types { tDefault=0, tByte, tShort, tLong, tFloat, tDouble, tString };
|
|
|
|
protected:
|
|
|
|
// class COXColumnData
|
|
// Define the data that the programmer has defined for each column, as
|
|
// he is looking for them.
|
|
class OX_CLASS_DECL COXColumnData
|
|
{
|
|
public:
|
|
// the name the developer uses to access this column
|
|
CString m_sName;
|
|
// the text representation of the data for this column
|
|
CString m_strData;
|
|
// the expected data type for this column
|
|
Types m_nType;
|
|
// TRUE if this string is to be quoted when written out
|
|
BOOL m_bQuote;
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Initialize this object for use. Default contructor
|
|
// for the object that will contain the information for each
|
|
// column/field of data in a row of the CSV file.
|
|
COXColumnData()
|
|
{
|
|
m_nType=tDefault;
|
|
m_bQuote=FALSE;
|
|
}
|
|
|
|
// --- In: columnData - constant reference to the column data
|
|
// object being copied
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Copies the values from columnData to this COXColumnData object
|
|
COXColumnData(const COXColumnData& columnData)
|
|
: m_sName(columnData.m_sName), m_strData(columnData.m_strData),
|
|
m_nType(columnData.m_nType), m_bQuote(columnData.m_bQuote)
|
|
{
|
|
}
|
|
};
|
|
|
|
// class COXDataArray
|
|
// Wrapper for a CArray containing COXColumnData objects.
|
|
class OX_CLASS_DECL COXDataArray : public CArray<COXColumnData, COXColumnData&>
|
|
{
|
|
};
|
|
|
|
|
|
protected:
|
|
// number of columns in the file
|
|
int m_nColumns;
|
|
// The data for the columns in this file
|
|
COXDataArray m_arrColumns;
|
|
// the last error that was encountered
|
|
mutable Errors m_nLastError;
|
|
// text describing each error
|
|
static LPCTSTR m_pstrErrorMsgs[];
|
|
// TRUE if the last line read in was empty
|
|
BOOL m_bLineEmpty;
|
|
// the character that separates fields in the file
|
|
TCHAR m_tcFieldDelim;
|
|
// the character that deliniates a text string in the file
|
|
TCHAR m_tcStringDelim;
|
|
|
|
// --- In: lpszName - the name of a column that we are looking for.
|
|
// --- Out:
|
|
// --- Returns: An integer index of the column within the string arrays, or -1 if
|
|
// the specified name was not found.
|
|
// --- Effect: Converts the name of a column into the index of that column
|
|
// in the string arrays
|
|
int FindColumn(LPCTSTR lpszName) const;
|
|
|
|
// --- In: nError - the error number indicating what has gone wrong.
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Sets the number of the last error encountered in this file.
|
|
inline void SetError(Errors nError) const { m_nLastError=nError; };
|
|
|
|
public:
|
|
//
|
|
// Constructors and Destructors
|
|
//
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Initializes a COXCsvFile object. Default constructor.
|
|
COXCsvFile();
|
|
|
|
// --- In: pFile - pointer to an already opened FILE object
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Setups the COXCsvFile object to use the passed open file handle
|
|
// for all reading and writing operations. Note, there is no
|
|
// checks are done on file mode.
|
|
COXCsvFile(FILE *pFile);
|
|
|
|
// --- In: lpszFileName - filename of the CSV file to open
|
|
// nOpenFlags - bit flags used to control how the file is
|
|
// opened. These flags are defined in the
|
|
// CFile constructor documentation.
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Setups this COXCsvFile object to operate on the specified file,
|
|
// opening that file for reading/writing, as specified.
|
|
// Throws CFileException when an error occurres while trying to open
|
|
// the specified file.
|
|
COXCsvFile(LPCTSTR lpszFileName, UINT nOpenFlags);
|
|
|
|
#ifdef _DEBUG
|
|
virtual void AssertValid() const;
|
|
virtual void Dump(CDumpContext& dc) const;
|
|
#endif
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns: An integer indicating how many columns are in the table
|
|
// being processed.
|
|
// --- Effect: Retrieves the number of columns in the table currently
|
|
// being processed
|
|
inline int GetColumnCount() const { return m_nColumns; }
|
|
|
|
// --- In: tcDelim - the character to be used to delimit fields
|
|
// within a row.
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Set what character will be used to indicate when one field
|
|
// has ended and another has begun. The default for this character
|
|
// is the comma (,)
|
|
inline void SetFieldDelimiter(TCHAR tcDelim) { m_tcFieldDelim=tcDelim; };
|
|
|
|
// --- In: tcDelim - the character to be used to delimit strings.
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Sets what character will be used to mark the beginning and
|
|
// ending of a text string within a field. To delimit a string
|
|
// within a field, this must be the first character in the field.
|
|
// In order to include the string delimiter within the body of a
|
|
// field, it must be escaped by enclosing the whole field as a
|
|
// string, and replacing the occurrance(s) of the delimiter in
|
|
// the field with two copies of the delimiter. This is handled
|
|
// automatically by the COXCsvFile class when reading and writing
|
|
// CSV files. The default for this character is the double quote (").
|
|
inline void SetStringDelimiter(TCHAR tcDelim) { m_tcStringDelim=tcDelim; };
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns: The ID of the last error encountered on this file
|
|
// --- Effect: Retrieves the code for the last error that occured when
|
|
// working with this file. These error codes are limited to
|
|
// errors within the code that processes the comma delimited files,
|
|
// and not general I/O errors.
|
|
inline Errors GetLastError() const { return m_nLastError; };
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns: The description of the last error encountered in this file
|
|
// --- Effect: Retrieves the string describing the last error that was
|
|
// encountered while processing this file.
|
|
inline LPCTSTR GetLastErrorMsg() const { return GetErrorMsg(GetLastError()); };
|
|
|
|
// --- In: nError - the error code that a textual description is
|
|
// desired for.
|
|
// --- Out:
|
|
// --- Returns: The description of the specified error.
|
|
// --- Effect: Retrieves the string describing the error code that is passed in.
|
|
inline LPCTSTR GetErrorMsg(Errors nError) const { return m_pstrErrorMsgs[nError]; };
|
|
|
|
|
|
|
|
//
|
|
// Column management functions
|
|
//
|
|
|
|
// --- In: nIndex - the index of the column to assign this
|
|
// information to.
|
|
// lpszName - the name of the column. This will be the name
|
|
// written out by the WriteHeaders() funtion when
|
|
// writing a CSV file.
|
|
// nType - enumerated value indicating the type of data that
|
|
// is expected for this column.
|
|
// bQuote - TRUE if the string value is to be quoted on output.
|
|
// This value defaults to FALSE
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Sets the description of a column to the file being processed
|
|
// by this object. The specified column will have a name associated
|
|
// with it, as well as optional aliases to look for when reading
|
|
// the file in, and the type of data expected in that column.
|
|
void SetColumnInfo(int nIndex, LPCTSTR lpszName, Types nType, BOOL bQuote=FALSE);
|
|
|
|
// --- In: sName - the name associated with the specified aliases.
|
|
// arrAliases - a CStringArray containing the list of aliases
|
|
// that are possible for this column.
|
|
// lpstrAliases- an array of sting pointers containing the list
|
|
// of aliases that are possible for this column.
|
|
// The last pointer in this array must be a NULL
|
|
// pointer to mark the end of the list.
|
|
// --- Out:
|
|
// --- Returns: TRUE if the name was found, or one of the aliases was found
|
|
// and its name was changed. FALSE if neither the name nor any of
|
|
// the aliases was found.
|
|
// --- Effect: Assigns a set of alternative names that may be used to
|
|
// define a column in a CSV file. This function should be called
|
|
// after GetColumns() when reading in a CSV file. This will scan
|
|
// the headers read in, checking first for a column that matches
|
|
// sName parameter. If one is found, nothing more is done.
|
|
// If not, it than searches for a column that matches one of the
|
|
// aliases. If one is found, that column's internal name will be
|
|
// changed to sName for future reference.
|
|
BOOL SetAliases(const CString& sName, const CStringArray& arrAliases);
|
|
BOOL SetAliases(const CString& sName, LPCTSTR lpstrAliases[]);
|
|
|
|
|
|
// --- In: nColumns - the number of columns that will be used in
|
|
// this file. If -1 specified then the number
|
|
// of columns will be calculated on the base
|
|
// of the file contents
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Prepares this CSV file object for reading/writing a new table
|
|
// of values from/to the file. This would be used when column
|
|
// headers are not used. This function will clear the column
|
|
// information and data arrays
|
|
void Initialize(int nColumns=-1);
|
|
|
|
|
|
// --- In: arrColumns - a CStringArray containing the names of the
|
|
// individual columns, in order.
|
|
// lpstrColumns- an array of string pointers containing the
|
|
// names of the individual columns, in order. The
|
|
// last element of the array must be a NULL pointer.
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Sets the column names for the current CSV file. The names
|
|
// will be used when reading/writing records from/to the
|
|
// COXCsvFile object.
|
|
void SetColumns(const CStringArray& arrColumns);
|
|
void SetColumns(LPCTSTR lpstrColumns[]);
|
|
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Writes out a line to the file containing the current set of
|
|
// column names, all delimited as strings.
|
|
void WriteHeaders();
|
|
|
|
|
|
// --- In: nNumExpected - the number of columns expected in the file.
|
|
// --- Out:
|
|
// --- Returns: TRUE if no errors occured while reading in the column headers,
|
|
// FALSE indicates that an error occured, and GetLastError() should
|
|
// be called to determine what that error was.
|
|
// --- Effect: Retrieves the column names from the file being read in by
|
|
// reading the next line from the file, and parse out the names
|
|
// of the columns from that line.
|
|
BOOL GetColumns(int nNumExpected);
|
|
|
|
|
|
|
|
//
|
|
// output functions
|
|
//
|
|
|
|
// --- In: nColumn - the number of the column to assign the
|
|
// specified data to.
|
|
// lpszText - the text to assign to this column.
|
|
// bQuote - TRUE if the text string is to be quoted on
|
|
// output.
|
|
// --- Out:
|
|
// --- Returns: TRUE if the specified column was found, FALSE if the column
|
|
// was not found.
|
|
// --- Effect: Assigns a text string to a column in the current row when
|
|
// writing out to a CSV file. The text in this column will be
|
|
// quoted if either the quote parameter is TRUE, or if either
|
|
// one of the delimiter characters is found in the text of the
|
|
// string. The second version of this function will use
|
|
// column info that previously set using SetColumnInfo() function
|
|
// in order to retrieve the bQuote flag.
|
|
BOOL WriteColumn(int nColumn, LPCTSTR lpszText, BOOL bQuote);
|
|
BOOL WriteColumn(int nColumn, LPCTSTR lpszText);
|
|
|
|
// --- In: nColumn - the number of the column to assign the
|
|
// specified data to.
|
|
// ucData - the byte sized numeric value to be written out
|
|
// unData - the word sized numeric value to be written out.
|
|
// ulData - the unsigned long sized numeric value to be
|
|
// written out.
|
|
// nData - the short integer value to be written out.
|
|
// lData - the long integer value to be written out.
|
|
// fData - the floating point value to be written out.
|
|
// dData - the double floating point value to be written out.
|
|
// bHex - TRUE if this value is to be written out in
|
|
// hexidecimal format.
|
|
// --- Out:
|
|
// --- Returns: TRUE if the specified column was found, FALSE if the column
|
|
// was not found.
|
|
// --- Effect: Assigns a numeric value (in different formats) to column.
|
|
// The value will be written out as either a decimal or hexidecimal
|
|
// number
|
|
BOOL WriteColumn(int nColumn, unsigned char ucData, BOOL bHex=FALSE);
|
|
BOOL WriteColumn(int nColumn, unsigned short unData, BOOL bHex=FALSE);
|
|
BOOL WriteColumn(int nColumn, unsigned long ulData, BOOL bHex=FALSE);
|
|
BOOL WriteColumn(int nColumn, short nData, BOOL bHex=FALSE);
|
|
BOOL WriteColumn(int nColumn, long lData, BOOL bHex=FALSE);
|
|
BOOL WriteColumn(int nColumn, float fData);
|
|
BOOL WriteColumn(int nColumn, double dData);
|
|
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns:
|
|
// --- Effect: Writes out the current set of values for the current line to
|
|
// the CSV file. After writing out the values, the column values
|
|
// stored in the class will be reset to empty strings in preperation
|
|
// for the next line of data.
|
|
void WriteLine();
|
|
|
|
|
|
|
|
//
|
|
// input functions
|
|
//
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns: TRUE if the line was read in, and no errors occurred
|
|
// --- Effect: Reads in the next line of text from the CSV file, and break
|
|
// it into individual columns in the COXCsvFile::m_arrColumns array.
|
|
// If more columns of data are encountered in the current line
|
|
// than we are looknig for (as specified by the call to
|
|
// ReadColumns() function), a errTooManyColumns error will be
|
|
// generated. The data read will still be available, and
|
|
// a programmer is free to ignore this error.
|
|
// If the line ends with a quoted string, but the closing quote
|
|
// is not encountered, more lines will be read from the file and
|
|
// appended to this line until the closing quote is found. This
|
|
// allows for fields that contain line breaks, which is allowed
|
|
// within many spreadsheet and database products that process CSV
|
|
// files. After the line is successfully read in breaks it down into
|
|
// the individual field values.
|
|
BOOL ReadLine();
|
|
|
|
|
|
// --- In:
|
|
// --- Out:
|
|
// --- Returns: TRUE if the just read line was empty.
|
|
// --- Effect: Indicates if the line that was just read contained nothing
|
|
// but whitespace.
|
|
inline BOOL IsLineEmpty() const { return m_bLineEmpty; };
|
|
|
|
|
|
// --- In: lpszName - the name of the column to get the data from
|
|
// nColumn - the index of the column to get the data from
|
|
// lpstrSet - the array of string pointers defining the names
|
|
// of the elements in the set to scan for a match to.
|
|
// The last element of this array must be a NULL.
|
|
// --- Out: strText - the CString object to return the text string in
|
|
// ucData - the byte variable the result is to be returned in
|
|
// unData - the word variable the result is to be returned in
|
|
// nDataIndex - the integer variable the index of the element
|
|
// in lpstrSet array that matches the data in the
|
|
// specified column is to be returned in
|
|
// nData - the short integer variable the result is to be
|
|
// returned in
|
|
// lData - the long integer variable the result is to be
|
|
// returned in
|
|
// fData - the floating point variable the result is to be
|
|
// returned in
|
|
// dData - the double floating point variable the result
|
|
// is to be returned in
|
|
// --- Returns: TRUE if the specified column was found, FALSE if the column
|
|
// was not found.
|
|
// --- Effect: Retrieves the data from the specified column in the current line
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, CString& strText)
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,strText) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, CString& strText);
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, unsigned char& ucData)
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,ucData) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, unsigned char& ucData);
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, unsigned short& unData)
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,unData) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, unsigned short& unData);
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, int& nDataIndex, LPCTSTR lpstrSet[])
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,nDataIndex,lpstrSet) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, int& nDataIndex, LPCTSTR lpstrSet[]);
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, short& nData)
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,nData) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, short& nData);
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, long& lData)
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,lData) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, long& lData);
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, float& fData)
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,fData) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, float& fData);
|
|
inline BOOL ReadColumn(LPCTSTR lpszName, double& dData)
|
|
{
|
|
int nIndex=FindColumn(lpszName);
|
|
return ((nIndex>=0) ? ReadColumn(nIndex,dData) : FALSE);
|
|
}
|
|
BOOL ReadColumn(int nColumn, double& dData);
|
|
};
|
|
|
|
#endif
|