All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KeyValueParser.cxx
Go to the documentation of this file.
1 /**
2  * @file icaruscode/PMT/Algorithms/KeyValueParser.cxx
3  * @brief Simple parser for "key: value" text.
4  * @author Gianluca Petrillo (petrillo@slac.stanford.edu)
5  * @date May 13, 2022
6  * @see icaruscode/PMT/Algorithms/KeyValueParser.h
7  */
8 
9 // library header
11 
12 // C++ standard libraries
13 #include <algorithm> // std::copy(), std::sort()
14 #include <istream>
15 #include <sstream>
16 #include <initializer_list>
17 #include <iterator> // std::advance(), std::next(), std::move_iterator()
18 #include <string>
19 #include <utility> // std::move()
20 #include <cstdint> // std::size_t
21 
22 
23 // -----------------------------------------------------------------------------
24 namespace {
25 
26  /// Returns a copy of `keys` collection sorted by length then
27  /// lexicographically.
28  template <typename Keys>
29  std::vector<std::string> sortKeys(std::initializer_list<Keys> keys) {
30  using std::size;
31  std::vector<std::string> sorted;
32  sorted.reserve(size(keys));
33  std::copy(begin(keys), end(keys), back_inserter(sorted));
34 
35  auto const byOpeningLength = [](auto const& a, auto const& b)
36  {
37  std::size_t const al = a.length(), bl = b.length();
38  return (al != bl)? (al > bl): (a < b);
39  };
40  std::sort(sorted.begin(), sorted.end(), byOpeningLength);
41  return sorted;
42  } // sortKeys()
43 
44 } // local namespace
45 
46 
47 // -----------------------------------------------------------------------------
48 // --- icarus::details::KeyValueParser
49 // -----------------------------------------------------------------------------
52 
53 
54 // -----------------------------------------------------------------------------
55  /// Creates a parser with the specified parsing parameters.
57  FormatParams_t formatParams /* = DefaultFormatParameters */,
59  /* = icarus::ParsingToolkit::DefaultParameters */
60  )
61  : fPTK{ std::move(parserParams) }
62  , fFmt{ std::move(formatParams) }
63  , fKeys{ sortKeys({ fFmt.newKey, fFmt.addKey }) }
64 {}
65 
66 
67 // -----------------------------------------------------------------------------
68 auto icarus::details::KeyValueParser::parse(std::istream& s) const
69  -> ParsedData_t
70 {
71 
72  ParsedData_t data;
73 
74  unsigned int iSrcLine = 0U; // count of lines in the source
75  while (s) {
76 
77  auto [ line, nMultiLines ] = fPTK.readMultiline(s);
78  iSrcLine += nMultiLines;
79 
80  if (line.empty()) continue;
81 
82  std::vector<std::string_view> tokens = fPTK.splitWords(line);
83 
84  fPTK.removeCommentLine(tokens);
85  if (tokens.empty()) continue;
86 
87  keyType const kType = highlightSeparator(tokens);
88  if (kType == keyType::unsupported) {
89  auto iKey = fKeys.cbegin(), kend = fKeys.cend();
90  std::string l { "'" + *iKey + "'" };
91  while (++iKey != kend) l += ", '" + *iKey + "'";
92  throw ParserError
93  { iSrcLine - nMultiLines + 1, line, "no key separator (" + l + ")" };
94  }
95  assert(tokens.size() >= 2U);
96 
97  std::vector<std::string> words
98  = fPTK.removeEscapes(fPTK.removeQuotations(tokens));
99 
100  auto iWord = words.begin();
101  ParsedData_t::Item& item = data.makeOrFetchItem(std::move(*iWord));
102  std::advance(iWord, 2); // skip to after the separator
103 
104  switch (kType) {
105  case keyType::create:
106  item.clear();
107  [[fallthrough]];
108  case keyType::add:
109  item.addValues
110  (std::move_iterator{ iWord }, std::move_iterator{ words.end() });
111  break;
112  default:
113  throw ParserError{ iSrcLine - nMultiLines + 1, line,
114  "LOGIC ERROR: '" + std::string{ tokens[1] }
115  + "' should have been a key separator"
116  };
117  } // switch
118 
119  } // while
120 
121  return data;
122 
123 } // icarus::KeyValueParser::parse()
124 
125 
126 // -----------------------------------------------------------------------------
128  (std::string const& s) const -> ParsedData_t
129  { return parse(std::istringstream{ s }); }
130 
131 
132 // -----------------------------------------------------------------------------
134  (std::vector<std::string_view>& tokens) const -> keyType
135 {
136 
137  // need to find the separator in the unquoted, unescaped parts of first token,
138  // or the separator must be the start of the second token
139 
140  if (tokens.empty()) return keyType::unsupported;
141 
142  //
143  // separator in the first word
144  //
145  std::string_view const firstKey
146  = fPTK.findFirstUnquoted(tokens.front(), fKeys);
147 
148  if (!firstKey.empty()) {
149  auto const [ pre, sep, post ] = fPTK.splitOn(tokens.front(), firstKey);
150  // if pre is empty, it's still considered a (empty) key (questionable...)
151  // if post is empty, we omit it
152  tokens.front() = sep;
153  if (!post.empty()) tokens.insert(std::next(tokens.begin()), post);
154  tokens.insert(tokens.begin(), pre);
155  return keySepType(sep);
156  }
157 
158  //
159  // separator is the second word start
160  //
161  if (tokens.size() < 2U)
162  return keyType::unsupported; // ah, actually: no, it wasn't.
163 
164  for (std::string const& sep: fKeys) {
165 
166  if (tokens[1].compare(0, sep.length(), sep) != 0) continue;
167 
168  // so now we have [0] key [1] sep[+first value] [2...] values;
169  // we just need to insert the "first value", if any, as its own token;
170  // we also take care to leave the views pointing to the token rather than to
171  // a member of 'fKeys'
172  if (!tokens[1].empty()) {
173  std::string_view firstValue = tokens[1];
174  firstValue.remove_prefix(sep.length());
175  tokens.insert(std::next(tokens.begin(), 2), firstValue);
176  tokens[1].remove_suffix(firstValue.length());
177  }
178 
179  return keySepType(sep);
180  } // for
181 
182  //
183  // no separator at all
184  //
185  return keyType::unsupported;
186 
187 } // icarus::details::KeyValueParser::highlightSeparator()
188 
189 
190 // -----------------------------------------------------------------------------
191 template <typename Key>
192 auto icarus::details::KeyValueParser::keySepType(Key const& key) const
193  -> keyType
194 {
195  if (key == fFmt.newKey) return keyType::create;
196  if (key == fFmt.addKey) return keyType::add;
197  return keyType::unsupported;
198 } // icarus::details::KeyValueParser::keySepType()
199 
200 
201 // -----------------------------------------------------------------------------
203  (unsigned int iLine, std::string const& line, std::string const& msg)
204  : Error{
205  "KeyValueParser::ParserError on line " + std::to_string(iLine)
206  + " ('" + line + "'): " + msg
207  }
208  {}
209 
210 // -----------------------------------------------------------------------------
211 
Simple parser for &quot;key: value&quot; text.
std::size_t size(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:561
KeyValueParser(FormatParams_t formatParams=DefaultFormatParameters, icarus::ParsingToolkit::Params_t parserParams=icarus::ParsingToolkit::DefaultParameters)
Creates a parser with the specified parsing parameters.
All parsing parameters.
keyType keySepType(Key const &key) const
Returns the type of key.
process_name gaushit a
static const FormatParams_t DefaultFormatParameters
keyType highlightSeparator(std::vector< std::string_view > &tokens) const
Modifies tokens placing the key/value separator in its own token.
auto end(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:585
auto begin(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:573
std::string to_string(WindowPattern const &pattern)
then echo File list $list not found else cat $list while read file do echo $file sed s
Definition: file_to_url.sh:60
ParsedData_t parse(std::istream &stream) const
Parses the stream and returns a data structure with the content.
T copy(T const &v)
ParserError(unsigned int iLine, std::string const &line, std::string const &msg)
bool empty(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:555