All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KeyedCSVparser.cxx
Go to the documentation of this file.
1 /**
2  * @file icaruscode/Decode/DecoderTools/details/KeyedCSVparser.cxx
3  * @brief Simple parser for comma-separated text (implementation).
4  * @author Gianluca Petrillo (petrillo@slac.stanford.edu)
5  * @date May 9, 2021
6  * @see icaruscode/Decode/DecoderTools/details/KeyedCSVparser.h
7  */
8 
9 // library header
11 
12 // C++ standard libraries
13 #include <ostream>
14 #include <cassert>
15 #include <cctype> // std::isspace()
16 
17 
18 // -----------------------------------------------------------------------------
19 // --- icarus::details::KeyedCSVparser
20 // -----------------------------------------------------------------------------
22  (std::string_view const& s, ParsedData_t& data) const
23 {
24 
25  auto stream = s;
26 
27  ParsedData_t::Item* currentItem = nullptr;
28 
29  // this many tokens will be assigned to the current key:
30  int forcedValues = -1; // 0 would force the first entry to be a key
31 
32  while (!stream.empty()) {
33 
34  auto const token = extractToken(stream);
35 
36  std::string tokenStr { cbegin(token), cend(token) };
37 
38  bool bKey = false;
39  do {
40 
41  // if there are values pending, this is not a key, period;
42  // if all required values have been assigned, the next token is a key.
43  if (forcedValues >= 0) {
44  bKey = (forcedValues == 0); // if no more forced values, next is key
45  --forcedValues;
46  // if we know the token is a key, we still need to check for matching
47  // patterns to assign required values
48  if (!bKey) break;
49  }
50 
51  // the token may still be a key (if `bKey` is true, it is for sure: we can
52  // decide that a non-key (!bKey) is actually a key, but not the opposite)
53  for (auto const& [ pattern, values ]: fPatterns) {
54  if (!std::regex_match(begin(token), end(token), pattern)) continue;
55  bKey = true; // matching a pattern implies this is a key
56  std::string const& key = tokenStr;
57  // how many values to expect:
58  switch (values) {
59  case FixedSize: // read the next token immediately as fixed size
60  {
61  if (stream.empty()) throw MissingSize(key);
62 
63  auto const sizeToken = peekToken(stream);
64  if (empty(sizeToken)) throw MissingSize(key);
65 
66  // the value is loaded in `forcedValues` and already excludes
67  // the size token just read
68  char const *b = begin(sizeToken), *e = end(sizeToken);
69  if (std::from_chars(b, e, forcedValues).ptr != e)
70  throw MissingSize(key, std::string{ sizeToken });
71 
72  ++forcedValues; // the size will be forced in the values anyway
73 
74  } // FixedSize
75  break;
76  case DynamicSize:
77  // nothing to do, the normal algorithm rules will follow
78  break;
79  default:
80  forcedValues = values;
81  break;
82  } // switch
83  break;
84  } // for pattern
85  if (bKey) break;
86 
87  // let the "standard" pattern decide
88  bKey = isKey(token);
89 
90  } while (false);
91 
92  if (bKey) currentItem = &(data.makeItem(std::move(tokenStr)));
93  else {
94  if (!currentItem) {
95  throw InvalidFormat(
96  "values started without a key ('" + tokenStr + "' is not a valid key)."
97  );
98  }
99  currentItem->addValue(std::move(tokenStr));
100  }
101 
102  } // while
103 
104  if (forcedValues > 0) {
105  assert(currentItem);
106  throw MissingValues(currentItem->key(), forcedValues);
107  }
108 
109 } // icarus::KeyedCSVparser::parse()
110 
111 
112 // -----------------------------------------------------------------------------
114  (std::initializer_list<std::pair<std::regex, unsigned int>> patterns)
115  -> KeyedCSVparser&
116 {
117  for (auto& pattern: patterns) fPatterns.emplace_back(std::move(pattern));
118  return *this;
119 } // icarus::details::KeyedCSVparser::addPatterns()
120 
121 
122 // -----------------------------------------------------------------------------
124  (std::initializer_list<std::pair<std::string, unsigned int>> patterns)
125  -> KeyedCSVparser&
126 {
127  for (auto& pattern: patterns)
128  fPatterns.emplace_back(std::regex{ pattern.first }, pattern.second);
129  return *this;
130 } // icarus::details::KeyedCSVparser::addPatterns()
131 
132 
133 // -----------------------------------------------------------------------------
135  (std::string const& s) const -> ParsedData_t
136  { return parse(std::string_view{ s.data(), s.size() }); }
137 
138 
139 // -----------------------------------------------------------------------------
141  (Buffer_t const& buffer) const noexcept
142 {
143 
144  auto const start = cbegin(buffer), bend = cend(buffer);
145  auto finish = start;
146  while (finish != bend) {
147  if (*finish == fSep) break;
148  ++finish;
149  } // for
150 
151  return std::distance(start, finish);
152 } // icarus::details::KeyedCSVparser::findTokenLength()
153 
154 
155 // -----------------------------------------------------------------------------
157  (Buffer_t const& buffer) const noexcept -> SubBuffer_t
158 {
159  return strip({ cbegin(buffer), findTokenLength(buffer) });
160 } // icarus::details::KeyedCSVparser::peekToken()
161 
162 
163 // -----------------------------------------------------------------------------
165  (Buffer_t& buffer) const noexcept -> SubBuffer_t
166 {
167 #if 1
168  auto const start = cbegin(buffer), bend = cend(buffer);
169  std::size_t const length = findTokenLength(buffer);
170  moveBufferHead(buffer, length + ((start + length == bend)? 0: 1));
171  return strip({ start, length });
172 #else
173 
174  auto const start = cbegin(buffer), bend = cend(buffer);
175  auto finish = start;
176  while (finish != bend) {
177  if (*finish == fSep) break;
178  ++finish;
179  } // for
180 
181  // update the start of the buffer
182  std::size_t const tokenLength = std::distance(start, finish);
183  moveBufferHead(buffer, tokenLength + ((finish == bend)? 0: 1));
184 
185  return strip({ start, tokenLength });
186 #endif
187 } // icarus::details::KeyedCSVparser::extractToken()
188 
189 
190 // -----------------------------------------------------------------------------
192  (SubBuffer_t const& buffer) const noexcept
193 {
194 
195  return !buffer.empty() && std::isalpha(buffer.front());
196 
197 } // icarus::details::KeyedCSVparser::isKey()
198 
199 
200 // -----------------------------------------------------------------------------
201 template <typename String>
202 auto icarus::details::KeyedCSVparser::makeBuffer(String const& s) noexcept
203  -> Buffer_t
204  { return { data(s), size(s) }; } // C++20: use begin/end constructor
205 
206 
207 
208 // -----------------------------------------------------------------------------
210  (Buffer_t& buffer, std::size_t size) noexcept -> Buffer_t&
211 {
212 
213  size = std::min(size, buffer.size());
214  return buffer = { buffer.data() + size, buffer.size() - size };
215 
216 } // details::KeyedCSVparser::eatBufferHead()
217 
218 
219 // -----------------------------------------------------------------------------
221  -> SubBuffer_t
222  { return stripRight(stripLeft(stripRightChars<'\n', '\r', '\0'>(s))); }
223 
224 
225 // -----------------------------------------------------------------------------
227  -> SubBuffer_t
228 {
229 
230  while (!s.empty()) {
231  if (!std::isspace(s.front())) break;
232  s.remove_prefix(1);
233  }
234  return s;
235 
236 } // icarus::details::KeyedCSVparser::stripLeft()
237 
238 
239 // -----------------------------------------------------------------------------
241  -> SubBuffer_t
242 {
243 
244  while (!s.empty()) {
245  if (!std::isspace(s.back())) break;
246  s.remove_suffix(1);
247  }
248  return s;
249 
250 } // icarus::details::KeyedCSVparser::stripRight()
251 
252 
253 // -----------------------------------------------------------------------------
255  (SubBuffer_t s, char c) noexcept -> SubBuffer_t
256 {
257 
258  while (!s.empty()) {
259  if (s.back() != c) break;
260  s.remove_suffix(1);
261  }
262  return s;
263 
264 } // icarus::details::KeyedCSVparser::stripRightChar()
265 
266 
267 // ----------------------------------------------------------------------------
268 template <char... Chars>
270  (SubBuffer_t s) noexcept -> SubBuffer_t
271 {
272  while (true) {
273  auto ns = s;
274  for (char c: { Chars... }) ns = stripRightChar(ns, c);
275  if (ns == s) return ns;
276  s = ns;
277  } // while(true)
278 
279 } // icarus::details::KeyedCSVparser::stripRightChars()
280 
281 
282 // -----------------------------------------------------------------------------
283 
Parser to fill a KeyValuesData structure out of a character buffer.
bool isKey(SubBuffer_t const &buffer) const noexcept
Is content of buffer a key (as opposed to a value)?
static SubBuffer_t stripRightChars(SubBuffer_t s) noexcept
std::size_t size(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:561
auto cbegin(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:567
static SubBuffer_t stripRight(SubBuffer_t s) noexcept
Simple parser for comma-separated text.
auto cend(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:579
Representation of a single item of data: a key and several values.
ParsedData_t parse(std::string_view const &s) const
Parses the buffer s and returns a data structure with the content.
static SubBuffer_t stripRightChar(SubBuffer_t s, char c) noexcept
double distance(geo::Point_t const &point, CathodeDesc_t const &cathode)
Returns the distance of a point from the cathode.
static Buffer_t & moveBufferHead(Buffer_t &buffer, std::size_t size) noexcept
Collection of items with key/values structure.
auto end(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:585
std::size_t findTokenLength(Buffer_t const &buffer) const noexcept
Returns the length of the next toke, up to the next separator (excluded).
KeyedCSVparser & addPatterns(std::initializer_list< std::pair< std::regex, unsigned int >> patterns)
Adds known patterns.
auto begin(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:573
static SubBuffer_t stripLeft(SubBuffer_t s) noexcept
SubBuffer_t extractToken(Buffer_t &buffer) const noexcept
Extracts the next token from the buffer and returns its value, stripped.
then echo File list $list not found else cat $list while read file do echo $file sed s
Definition: file_to_url.sh:60
static SubBuffer_t strip(SubBuffer_t s) noexcept
do i e
Item & makeItem(std::string key)
Creates and registers a new item with the specified key.
SubBuffer_t peekToken(Buffer_t const &buffer) const noexcept
Returns the value of the next token, stripped.
static Buffer_t makeBuffer(String const &s) noexcept
bool empty(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:555