All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KeyedCSVparser.h
Go to the documentation of this file.
1 /**
2  * @file icaruscode/Decode/DecoderTools/details/KeyedCSVparser.h
3  * @brief Simple parser for comma-separated text.
4  * @author Gianluca Petrillo (petrillo@slac.stanford.edu)
5  * @date May 9, 2021
6  * @see icaruscode/Decode/DecoderTools/details/KeyedCSVparser.cxx
7  */
8 
9 #ifndef ICARUSCODE_DECODE_DECODERTOOLS_DETAILS_KEYEDCVSPARSER_H
10 #define ICARUSCODE_DECODE_DECODERTOOLS_DETAILS_KEYEDCVSPARSER_H
11 
12 // ICARUS libraries
14 
15 // C++ standard libraries
16 #include <iosfwd> // std::ostream
17 #include <string_view>
18 #include <vector>
19 #include <string>
20 #include <optional>
21 #include <regex>
22 #include <initializer_list>
23 #include <stdexcept> // std::runtime_error
24 #include <utility> // std::move(), std::pair
25 #include <limits>
26 #include <type_traits> // std::is_constructible_v, std::is_arithmetic_v, ...
27 #include <charconv> // std::from_chars()
28 #include <cstddef> // std::size_t
29 
30 
31 // -----------------------------------------------------------------------------
32 namespace icarus::details { class KeyedCSVparser; }
33 /**
34  * @class icarus::details::KeyedCSVparser
35  * @brief Parser to fill a `KeyValuesData` structure out of a character buffer.
36  *
37  * It currently supports only single-line buffer.
38  *
39  * The parser operates one "line" at a time, returning a `KeyValuesData` with
40  * the values assigned to each detected key. No data type is implied: all
41  * elements are treated as strings, either a key or a value.
42  * The parser separates the elements according to a separator, strips them of
43  * trailing and heading spaces, then it decides whether each element is a value
44  * to be assigned to the last key found, or a new key.
45  * Keys are elements that have letters in them, values are anything else.
46  * This simple (and arguable) criterion can be broken with specific parser
47  * configuration: a pattern can be specified that when matched to an element
48  * will make it a key; the pattern can also set the number of values that key
49  * will require.
50  *
51  * For example:
52  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
53  * icarus::details::KeyedCSVparser parser;
54  * parser.addPatterns({
55  * { "TriggerType", 1U } // expect one value (even if contains letters)
56  * , { "TriggerWindows", 1U } // expect one value (even if contains letters)
57  * , { "TPChitTimes", icarus::details::KeyedCSVparser::FixedSize }
58  * // the first value is an integer, count of how many other values
59  * });
60  *
61  * icarus::KeyValuesData data = parser(
62  * "TriggerType, S5, Triggers, TriggerWindows, 0C0B,"
63  * " TPChits, 12, 130, 0, 0, TPChitTimes, 3, -1.1, -0.3, 0.1, PMThits, 8"
64  * );
65  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
66  * will return `data` with 6 items.
67  */
69 
70  public:
71 
73 
74  /// Base of all errors by KeyedCSVparser.
77  struct ParserError; ///< Generic error: base of all errors by KeyedCSVparser.
78  struct InvalidFormat; ///< Parsing format is not understood.
79  /// Expected number of values is missing.
81  struct MissingValues; ///< Expected values are missing.
82 
83  /// Mnemonic size value used in `addPattern()` calls.
84  static constexpr unsigned int FixedSize
85  = std::numeric_limits<unsigned int>::max();
86  /// Mnemonic size value used in `addPattern()` calls.
87  static constexpr unsigned int DynamicSize = FixedSize - 1U;
88 
89 
90  /// Constructor: specifies the separator character.
91  KeyedCSVparser(char sep = ','): fSep(sep) {}
92 
93  //@{
94  /// Parses the buffer `s` and returns a data structure with the content.
95  ParsedData_t parse(std::string_view const& s) const;
96  ParsedData_t parse(std::string const& s) const;
97  template <typename BIter, typename EIter>
98  ParsedData_t parse(BIter b, EIter e) const;
99 
100  ParsedData_t operator() (std::string_view const& s) const { return parse(s); }
101  ParsedData_t operator() (std::string const& s) const { return parse(s); }
102  template <typename BIter, typename EIter>
103  ParsedData_t operator() (BIter b, EIter e) const { return parse(b, e); }
104  //@}
105 
106  //@{
107  /// Parses the buffer `s` and fills `data` with it.
108  void parse(std::string_view const& s, ParsedData_t& data) const;
109  //@}
110 
111  /**
112  * @name Know patterns
113  *
114  * The parser normally treats as a value everything that does not start with a
115  * letter.
116  * Known patterns may override this behaviour: if a token matches a known
117  * pattern, it is considered a key and it is possible to specify the expected
118  * number of values.
119  *
120  * The number of values can be:
121  * * a number: exactly that number of values are required; an exception will
122  * be thrown if not enough tokens are available;
123  * * `FixedSize`: the next token must be a non-negative integer specifying how
124  * many other values to add (read this with `Item::getSizedVector()`);
125  * an exception will be thrown if not enough tokens are available;
126  * * `DynamicSize`: the standard algorithm is used and values are added as
127  * long as they don't look like keys; the token matching the pattern is
128  * interpreted as a key though.
129  *
130  * Patterns are considered in the order they were added.
131  */
132  /// @{
133 
134  //@{
135  /**
136  * @brief Adds a single known pattern.
137  * @param pattern the regular expression matching the key for this pattern
138  * @param values the number of values for this pattern
139  * @return this parser (`addPattern()` calls may be chained)
140  */
141  KeyedCSVparser& addPattern(std::regex pattern, unsigned int values)
142  { fPatterns.emplace_back(std::move(pattern), values); return *this; }
143  KeyedCSVparser& addPattern(std::string const& pattern, unsigned int values)
144  { return addPattern(std::regex{ pattern }, values); }
145  //@}
146 
147  //@{
148  /**
149  * @brief Adds known patterns.
150  * @param patterns sequence of patterns to be added
151  * @return this parser (`addPatterns()` calls may be chained)
152  *
153  * Each pattern is a pair key regex/number of values, like in `addPattern()`.
154  */
156  (std::initializer_list<std::pair<std::regex, unsigned int>> patterns);
158  (std::initializer_list<std::pair<std::string, unsigned int>> patterns);
159  //@}
160 
161  /// @}
162 
163  private:
164  using Buffer_t = std::string_view;
165  using SubBuffer_t = std::string_view;
166 
167  char const fSep = ','; ///< Character used as token separator.
168 
169  /// List of known patterns for matching keys, and how many values they hold.
170  std::vector<std::pair<std::regex, unsigned int>> fPatterns;
171 
172  /// Returns the length of the next toke, up to the next separator (excluded).
173  std::size_t findTokenLength(Buffer_t const& buffer) const noexcept;
174 
175  /// Returns the value of the next token, stripped.
176  SubBuffer_t peekToken(Buffer_t const& buffer) const noexcept;
177 
178  /// Extracts the next token from the `buffer` and returns its value, stripped.
179  SubBuffer_t extractToken(Buffer_t& buffer) const noexcept;
180 
181  /// Is content of `buffer` a key (as opposed to a value)?
182  bool isKey(SubBuffer_t const& buffer) const noexcept;
183 
184 
185  template <typename String>
186  static Buffer_t makeBuffer(String const& s) noexcept;
187 
188  static Buffer_t& moveBufferHead(Buffer_t& buffer, std::size_t size) noexcept;
189 
190  static SubBuffer_t strip(SubBuffer_t s) noexcept;
191  static SubBuffer_t stripLeft(SubBuffer_t s) noexcept;
192  static SubBuffer_t stripRight(SubBuffer_t s) noexcept;
193  static SubBuffer_t stripRightChar(SubBuffer_t s, char c) noexcept;
194 
195  template <char... Chars>
196  static SubBuffer_t stripRightChars(SubBuffer_t s) noexcept;
197 
198 }; // icarus::details::KeyedCSVparser
199 
200 
201 
202 // -----------------------------------------------------------------------------
203 // --- Exception class definitions
204 // -----------------------------------------------------------------------------
206 
207  ParserError(std::string msg): Error(std::move(msg)) {}
208 
209 }; // icarus::details::KeyedCSVparser::ParseError
210 
211 
212 // -----------------------------------------------------------------------------
214 
215  InvalidFormat(std::string const& msg): ParserError("Format error: " + msg) {}
216 
217 }; // icarus::details::KeyedCSVparser::InvalidFormat
218 
219 
220 // -----------------------------------------------------------------------------
222 
223  MissingValues(std::string const& key, unsigned int values)
224  : ErrorOnKey(key,
225  "data ended while expecting " + std::to_string(values) + " more values"
226  )
227  {}
228 
229 }; // icarus::details::KeyedCSVparser::MissingValues
230 
231 
232 // -----------------------------------------------------------------------------
233 // --- Inline implementation
234 // -----------------------------------------------------------------------------
236  (std::string_view const& s) const -> ParsedData_t
237  { ParsedData_t data; parse(s, data); return data; }
238 
239 
240 // -----------------------------------------------------------------------------
241 // --- Template implementation
242 // -----------------------------------------------------------------------------
243 template <typename BIter, typename EIter>
244 auto icarus::details::KeyedCSVparser::parse(BIter b, EIter e) const
245  -> ParsedData_t
246  { return parse(std::string_view{ &*b, std::distance(b, e) }); }
247 
248 
249 // -----------------------------------------------------------------------------
250 
251 
252 #endif // ICARUSCODE_DECODE_DECODERTOOLS_DETAILS_KEYEDCVSPARSER_H
static constexpr unsigned int FixedSize
Expected values are missing.
Parser to fill a KeyValuesData structure out of a character buffer.
KeyedCSVparser(char sep= ',')
Constructor: specifies the separator character.
double std(const std::vector< short > &wf, const double ped_mean, size_t start, size_t nsample)
Definition: UtilFunc.cxx:42
bool isKey(SubBuffer_t const &buffer) const noexcept
Is content of buffer a key (as opposed to a value)?
static SubBuffer_t stripRightChars(SubBuffer_t s) noexcept
ParsedData_t operator()(std::string_view const &s) const
std::size_t size(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:561
static SubBuffer_t stripRight(SubBuffer_t s) noexcept
char const fSep
Character used as token separator.
KeyedCSVparser & addPattern(std::regex pattern, unsigned int values)
Adds a single known pattern.
Simple parsed data format.
ParsedData_t parse(std::string_view const &s) const
Parses the buffer s and returns a data structure with the content.
static SubBuffer_t stripRightChar(SubBuffer_t s, char c) noexcept
double distance(geo::Point_t const &point, CathodeDesc_t const &cathode)
Returns the distance of a point from the cathode.
static Buffer_t & moveBufferHead(Buffer_t &buffer, std::size_t size) noexcept
static constexpr unsigned int DynamicSize
Mnemonic size value used in addPattern() calls.
Collection of items with key/values structure.
std::size_t findTokenLength(Buffer_t const &buffer) const noexcept
Returns the length of the next toke, up to the next separator (excluded).
KeyedCSVparser & addPatterns(std::initializer_list< std::pair< std::regex, unsigned int >> patterns)
Adds known patterns.
std::vector< std::pair< std::regex, unsigned int > > fPatterns
List of known patterns for matching keys, and how many values they hold.
static SubBuffer_t stripLeft(SubBuffer_t s) noexcept
SubBuffer_t extractToken(Buffer_t &buffer) const noexcept
Extracts the next token from the buffer and returns its value, stripped.
std::string to_string(WindowPattern const &pattern)
then echo File list $list not found else cat $list while read file do echo $file sed s
Definition: file_to_url.sh:60
static SubBuffer_t strip(SubBuffer_t s) noexcept
KeyedCSVparser & addPattern(std::string const &pattern, unsigned int values)
icarus::KeyValuesData ParsedData_t
do i e
SubBuffer_t peekToken(Buffer_t const &buffer) const noexcept
Returns the value of the next token, stripped.
MissingValues(std::string const &key, unsigned int values)
static Buffer_t makeBuffer(String const &s) noexcept