427 lines
12 KiB
C++
427 lines
12 KiB
C++
/*
|
|
* Copyright (c) 2019 Lawrence Livermore National Laboratory
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation;
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
* Author: Mathew Bielejeski <bielejeski1@llnl.gov>
|
|
*/
|
|
|
|
#ifndef NS3_CSV_READER_H_
|
|
#define NS3_CSV_READER_H_
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <fstream>
|
|
#include <istream>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
/**
|
|
* \file
|
|
* \ingroup core
|
|
* \ingroup csvreader
|
|
*
|
|
* ns3::CsvReader declaration
|
|
*
|
|
*/
|
|
namespace ns3
|
|
{
|
|
|
|
/**
|
|
* \ingroup core
|
|
* \defgroup csvreader CSV File Reader
|
|
*
|
|
* A way to extract data from simple csv files.
|
|
*/
|
|
|
|
/**
|
|
* \ingroup csvreader
|
|
*
|
|
* Provides functions for parsing and extracting data from
|
|
* Comma Separated Value (CSV) formatted text files.
|
|
* This parser is somewhat more relaxed than \RFC{4180};
|
|
* see below for a list of the differences.
|
|
* In particular it is possible to set the delimiting character at construction,
|
|
* enabling parsing of tab-delimited streams or other formats with delimiters.
|
|
*
|
|
* \note Excel may generate "CSV" files with either ',' or ';' delimiter
|
|
* depending on the locale: if ',' is the decimal mark then ';' is the list
|
|
* separator and used to read/write "CSV" files.
|
|
*
|
|
* To use this facility, construct a CsvReader from either a file path
|
|
* or \c std::istream, then FetchNextRow(), and finally GetValue()
|
|
* to extract specific values from the row.
|
|
*
|
|
* For example:
|
|
* \code
|
|
* CsvReader csv (filePath);
|
|
* while (csv.FetchNextRow ())
|
|
* {
|
|
* // Ignore blank lines
|
|
* if (csv.IsBlankRow ())
|
|
* {
|
|
* continue;
|
|
* }
|
|
*
|
|
* // Expecting three values
|
|
* double x, y, z;
|
|
* bool ok = csv.GetValue (0, x);
|
|
* ok |= csv.GetValue (1, y);
|
|
* ok |= csv.GetValue (2, z);
|
|
* if (!ok)
|
|
* {
|
|
* // Handle error, then
|
|
* continue;
|
|
* }
|
|
*
|
|
* // Do something with values
|
|
*
|
|
* } // while FetchNextRow
|
|
* \endcode
|
|
*
|
|
* As another example, supposing we need a vector from each row,
|
|
* the middle of the previous example would become:
|
|
* \code
|
|
* std::vector<double> v (n);
|
|
* bool ok = true;
|
|
* for (std::size_t i = 0; i < v.size (); ++i)
|
|
* {
|
|
* ok |= csv.GetValue (i, v[i]);
|
|
* }
|
|
* if (!ok) ...
|
|
* \endcode
|
|
*
|
|
*
|
|
* File Format
|
|
* ===========
|
|
*
|
|
* This parser implements \RFC{4180}, but with several restrictions removed;
|
|
* see below for differences. All the formatting features described next
|
|
* are illustrated in the examples which which follow.
|
|
*
|
|
* Comments
|
|
* --------
|
|
*
|
|
* The hash character (#) is used to indicate the start of a comment. Comments
|
|
* are not parsed by the reader. Comments are treated as either an empty column
|
|
* or part of an existing column depending on where the comment is located.
|
|
* Comments that are found at the end of a line containing data are ignored.
|
|
*
|
|
* 1,2 # This comment ignored, leaving two data columns
|
|
*
|
|
* Lines that contain a comment and no data are treated as rows with a single
|
|
* empty column, meaning that ColumnCount will return 1 and
|
|
* GetValue() will return an empty string.
|
|
*
|
|
* # This row treated as a single empty column, returning an empty string.
|
|
* "" # So is this
|
|
*
|
|
* IsBlankRow() will return \c true in either of these cases.
|
|
*
|
|
* Quoted Columns
|
|
* --------------
|
|
*
|
|
* Columns with string data which contain the delimiter character or
|
|
* the hash character can be wrapped in double quotes to prevent CsvReader
|
|
* from treating them as special characters.
|
|
*
|
|
* 3,string without delimiter,"String with comma ',' delimiter"
|
|
*
|
|
* Double quotes can be escaped
|
|
* by doubling up the quotes inside a quoted field. See example 6 below for
|
|
* a demonstration.
|
|
*
|
|
* Whitespace
|
|
* ----------
|
|
*
|
|
* Leading and trailing whitespace are ignored by the reader and are not
|
|
* stored in the column data.
|
|
*
|
|
* 4,5 , 6 # Columns contain '4', '5', '6'
|
|
*
|
|
* If leading or trailing whitespace are important
|
|
* for a column, wrap the column in double quotes as discussed above.
|
|
*
|
|
* 7,"8 "," 9" # Columns contain '7', '8 ', ' 9'
|
|
*
|
|
* Trailing Delimiter
|
|
* ------------------
|
|
*
|
|
* Trailing delimiters are ignored; they do _not_ result in an empty column.
|
|
*
|
|
*
|
|
* Differences from RFC 4180
|
|
* -------------------------
|
|
* Section 2.1
|
|
* - Line break can be LF or CRLF
|
|
*
|
|
* Section 2.3
|
|
* - Non-parsed lines are allowed anywhere, not just as a header.
|
|
* - Lines do not all have to contain the same number fields.
|
|
*
|
|
* Section 2.4
|
|
* - Characters other than comma can be used to separate fields.
|
|
* - Lines do not all have to contain the same number fields.
|
|
* - Leading/trailing spaces are stripped from the field
|
|
* unless the whitespace is wrapped in double quotes.
|
|
* - A trailing delimiter on a line is not an error.
|
|
*
|
|
* Section 2.6
|
|
* - Quoted fields cannot contain line breaks
|
|
*
|
|
* Examples
|
|
* --------
|
|
* \par Example 1: Basic
|
|
* \code
|
|
* # Column 1: Product
|
|
* # Column 2: Price
|
|
* widget, 12.5
|
|
* \endcode
|
|
*
|
|
* \par Example 2: Comment at end of line
|
|
* \code
|
|
* # Column 1: Product
|
|
* # Column 2: Price
|
|
* broken widget, 12.5 # this widget is broken
|
|
* \endcode
|
|
*
|
|
* \par Example 3: Delimiter in double quotes
|
|
* \code
|
|
* # Column 1: Product
|
|
* # Column 2: Price
|
|
* # Column 3: Count
|
|
* # Column 4: Date
|
|
* widget, 12.5, 100, "November 6, 2018"
|
|
* \endcode
|
|
*
|
|
* \par # Example 4: Hash character in double quotes
|
|
* \code
|
|
* # Column 1: Key
|
|
* # Column 2: Value
|
|
* # Column 3: Description
|
|
* count, 5, "# of widgets currently in stock"
|
|
* \endcode
|
|
*
|
|
* \par Example 5: Extra whitespace
|
|
* \code
|
|
* # Column 1: Key
|
|
* # Column 2: Value
|
|
* # Column 3: Description
|
|
* count , 5 ,"# of widgets in stock"
|
|
* \endcode
|
|
*
|
|
* \par Example 6: Escaped quotes
|
|
* \code
|
|
* # Column 1: Key
|
|
* # Column 2: Description
|
|
* # The value returned for Column 2 will be: String with "embedded" quotes
|
|
* foo, "String with ""embedded"" quotes"
|
|
* \endcode
|
|
*/
|
|
class CsvReader
|
|
{
|
|
public:
|
|
/**
|
|
* Constructor
|
|
*
|
|
* Opens the file specified in the filepath argument and
|
|
* reads data from it.
|
|
*
|
|
* \param filepath Path to a file containing CSV data.
|
|
* \param delimiter Character used to separate fields in the data file.
|
|
*/
|
|
CsvReader(const std::string& filepath, char delimiter = ',');
|
|
|
|
/**
|
|
* Constructor
|
|
*
|
|
* Reads csv data from the supplied input stream.
|
|
*
|
|
* \param stream Input stream containing csv data.
|
|
* \param delimiter Character used to separate fields in the data stream.
|
|
*/
|
|
CsvReader(std::istream& stream, char delimiter = ',');
|
|
|
|
/**
|
|
* Destructor
|
|
*/
|
|
virtual ~CsvReader();
|
|
|
|
/**
|
|
* Returns the number of columns in the csv data.
|
|
*
|
|
* \return Number of columns
|
|
*/
|
|
std::size_t ColumnCount() const;
|
|
|
|
/**
|
|
* The number of lines that have been read.
|
|
*
|
|
* \return The number of lines that have been read.
|
|
*/
|
|
std::size_t RowNumber() const;
|
|
|
|
/**
|
|
* Returns the delimiter character specified during object construction.
|
|
*
|
|
* \return Character used as the column separator.
|
|
*/
|
|
char Delimiter() const;
|
|
|
|
/**
|
|
* Reads one line from the input until a new line is encountered.
|
|
* The read data is stored in a cache which is accessed by the
|
|
* GetValue functions to extract fields from the data.
|
|
*
|
|
* \return \c true if a line was read successfully or \c false if the
|
|
* read failed or reached the end of the file.
|
|
*/
|
|
bool FetchNextRow();
|
|
|
|
/**
|
|
* Attempt to convert from the string data in the specified column
|
|
* to the specified data type.
|
|
*
|
|
* \tparam T The data type of the output variable.
|
|
*
|
|
* \param [in] columnIndex Index of the column to fetch.
|
|
* \param [out] value Location where the converted data will be stored.
|
|
*
|
|
* \return \c true if the specified column has data and the data
|
|
* was converted to the specified data type.
|
|
*/
|
|
template <class T>
|
|
bool GetValue(std::size_t columnIndex, T& value) const;
|
|
|
|
/**
|
|
* Check if the current row is blank.
|
|
* A blank row can consist of any combination of
|
|
*
|
|
* - Whitespace
|
|
* - Comment
|
|
* - Quoted empty string `""`
|
|
*
|
|
* \returns \c true if the input row is a blank line.
|
|
*/
|
|
bool IsBlankRow() const;
|
|
|
|
private:
|
|
/**
|
|
* Attempt to convert from the string data stored at the specified column
|
|
* index into the specified type.
|
|
*
|
|
* \param input [in] String value to be converted.
|
|
* \param value [out] Location where the converted value will be stored.
|
|
*
|
|
* \return \c true if the column exists and the conversion succeeded,
|
|
* \c false otherwise.
|
|
*/
|
|
/** @{ */
|
|
bool GetValueAs(std::string input, double& value) const;
|
|
|
|
bool GetValueAs(std::string input, float& value) const;
|
|
|
|
bool GetValueAs(std::string input, signed char& value) const;
|
|
|
|
bool GetValueAs(std::string input, short& value) const;
|
|
|
|
bool GetValueAs(std::string input, int& value) const;
|
|
|
|
bool GetValueAs(std::string input, long& value) const;
|
|
|
|
bool GetValueAs(std::string input, long long& value) const;
|
|
|
|
bool GetValueAs(std::string input, std::string& value) const;
|
|
|
|
bool GetValueAs(std::string input, unsigned char& value) const;
|
|
|
|
bool GetValueAs(std::string input, unsigned short& value) const;
|
|
|
|
bool GetValueAs(std::string input, unsigned int& value) const;
|
|
|
|
bool GetValueAs(std::string input, unsigned long& value) const;
|
|
|
|
bool GetValueAs(std::string input, unsigned long long& value) const;
|
|
/** @} */
|
|
|
|
/**
|
|
* Returns \c true if the supplied character matches the delimiter.
|
|
*
|
|
* \param c Character to check.
|
|
* \return \c true if \pname{c} is the delimiter character,
|
|
* \c false otherwise.
|
|
*/
|
|
bool IsDelimiter(char c) const;
|
|
|
|
/**
|
|
* Scans the string and splits it into individual columns based on the delimiter.
|
|
*
|
|
* \param [in] line String containing delimiter separated data.
|
|
*/
|
|
void ParseLine(const std::string& line);
|
|
|
|
/**
|
|
* Extracts the data for one column in a csv row.
|
|
*
|
|
* \param begin Iterator to the first character in the row.
|
|
* \param end Iterator to the last character in the row.
|
|
* \return A tuple containing the content of the column and an iterator
|
|
* pointing to the position in the row where the column ended.
|
|
*/
|
|
std::tuple<std::string, std::string::const_iterator> ParseColumn(
|
|
std::string::const_iterator begin,
|
|
std::string::const_iterator end);
|
|
|
|
/**
|
|
* Container of CSV data. Each entry represents one field in a row
|
|
* of data. The fields are stored in the same order that they are
|
|
* encountered in the CSV data.
|
|
*/
|
|
typedef std::vector<std::string> Columns;
|
|
|
|
char m_delimiter; //!< Character used to separate fields.
|
|
std::size_t m_rowsRead; //!< Number of lines processed.
|
|
Columns m_columns; //!< Fields extracted from the current line.
|
|
bool m_blankRow; //!< Line contains no data (blank line or comment only).
|
|
std::ifstream m_fileStream; //!< File stream containing the data.
|
|
|
|
/**
|
|
* Pointer to the input stream containing the data.
|
|
*/
|
|
std::istream* m_stream;
|
|
|
|
}; // class CsvReader
|
|
|
|
/****************************************************
|
|
* Template implementations.
|
|
***************************************************/
|
|
|
|
template <class T>
|
|
bool
|
|
CsvReader::GetValue(std::size_t columnIndex, T& value) const
|
|
{
|
|
if (columnIndex >= ColumnCount())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
std::string cell = m_columns[columnIndex];
|
|
|
|
return GetValueAs(std::move(cell), value);
|
|
}
|
|
|
|
} // namespace ns3
|
|
|
|
#endif // NS3_CSV_READER_H_
|