All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParsingToolkit_test.cc
Go to the documentation of this file.
1 /**
2  * @file ParsingToolkit_test.cc
3  * @brief Unit test for utilities in `ParsingToolkit.h`
4  * @date May 13, 2022
5  * @author Gianluca Petrillo (petrillo@slac.stanford.edu)
6  * @see icaruscode/PMT/Algorithms/ParsingToolkit.h
7  *
8  */
9 
10 // ICARUS libraries
12 
13 // Boost libraries
14 #define BOOST_TEST_MODULE ( ParsingToolkit_test )
15 #include <boost/test/unit_test.hpp>
16 
17 // C/C++ standard libraries
18 #include <tuple> // std::tie()
19 #include <vector>
20 #include <string_view>
21 #include <string>
22 #include <sstream>
23 #include <cassert>
24 
25 
26 // -----------------------------------------------------------------------------
27 // --- implementation detail tests
28 // -----------------------------------------------------------------------------
30 
31  icarus::ParsingToolkit const tk; // default configuration
32 
33  std::string const s { R"(\a\\a\\\a\\\\a\\\\\a\\\\\\a\\\)" };
34  auto const b = s.begin();
35  auto it = b;
36 
37  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \ !
38  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a !
39  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\ !
40  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\ !
41  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a !
42  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\ !
43  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\ !
44  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\ !
45  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a !
46  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\ !
47  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\ !
48  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\ !
49  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\ !
50  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a !
51  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\ !
52  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\ !
53  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\ !
54  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\ !
55  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\ !
56  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a !
57  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\ !
58  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\ !
59  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\ !
60  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\\ !
61  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\\\ !
62  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\\\\ !
63  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\\\\a !
64  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\\\\a\ !
65  BOOST_TEST( tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\\\\a\\ !
66  BOOST_TEST(!tk.isCharacterEscaped(b, it++)); // \a\\a\\\a\\\\a\\\\\a\\\\\\a\\\ !
67  assert(it == s.end());
68  BOOST_TEST( tk.isCharacterEscaped(b, it));
69 
70 } // isCharacterEscaped_test()
71 
72 
73 // -----------------------------------------------------------------------------
75 
76  using namespace std::string_literals;
77  using namespace std::string_view_literals;
78  icarus::ParsingToolkit tk; // default configuration
79 
80  BOOST_TEST(tk.removeTrailingBlanks(""s) == ""sv);
81 
82  BOOST_TEST(tk.removeTrailingBlanks("a b c "s) == "a b c "sv);
83 
84  BOOST_TEST(tk.removeTrailingBlanks(" \\ a b c "s) == "\\ a b c "sv);
85 
86  BOOST_TEST(tk.removeTrailingBlanks(" \t a b c "s) == "a b c "sv);
87 
88 } // removeTrailingBlanks_test()
89 
90 
91 // -----------------------------------------------------------------------------
93 
94  using namespace std::string_literals;
95  using namespace std::string_view_literals;
96  icarus::ParsingToolkit tk; // default configuration
97 
98  std::string_view s;
99  BOOST_TEST(std::distance(s.cbegin(), tk.findNextBlank(s)) == 0);
100 
101  s = "a b c "sv;
102  BOOST_TEST(std::distance(s.cbegin(), tk.findNextBlank(s)) == 1);
103 
104  s = "ab\\ cd e "sv;
105  BOOST_TEST(std::distance(s.cbegin(), tk.findNextBlank(s)) == 6);
106 
107  s = "a "sv;
108  BOOST_TEST(std::distance(s.cbegin(), tk.findNextBlank(s)) == 1);
109 
110 } // findNextBlank_test()
111 
112 
113 // -----------------------------------------------------------------------------
115 
116  using namespace std::string_literals;
117 
118  std::vector const lines {
119  "This line is not empty. The previous and the next two are."s
120  , "This is a multiline line of one, "s
121  , "two, "s
122  , "three lines."s
123  , R"(This "is a" "multi\"quoted\" strange)"s
124  , R"( line")"s
125  , "This is a normal line, but its not terminated end gets to: EOF"s
126  };
127 
128  std::istringstream stream {
129  "\n" // <--
130  + lines[0] + "\n" // <--
131  + "\n" // <--
132  + "\n" // <--
133  + lines[1] + "\\\n" // -.
134  + lines[2] + "\\\n" // |<--
135  + lines[3] + "\n" // -'
136  + lines[4] + "\n" // -.___
137  + lines[5] + "\n" // -'
138  + lines[6] // <--
139  };
140 
141  icarus::ParsingToolkit tk; // default configuration
142 
143  assert(stream);
144 
145  auto [ line, nPieces ] = tk.readMultiline(stream);
146  BOOST_TEST(line == ""s);
147  BOOST_TEST(nPieces == 1U);
148  BOOST_TEST(bool(stream));
149 
150  std::tie(line, nPieces) = tk.readMultiline(stream);
151  BOOST_TEST(line == lines[0]);
152  BOOST_TEST(nPieces == 1U);
153  BOOST_TEST(bool(stream));
154 
155  std::tie(line, nPieces) = tk.readMultiline(stream);
156  BOOST_TEST(line == "");
157  BOOST_TEST(nPieces == 1U);
158  BOOST_TEST(bool(stream));
159 
160  std::tie(line, nPieces) = tk.readMultiline(stream);
161  BOOST_TEST(line == "");
162  BOOST_TEST(nPieces == 1U);
163  BOOST_TEST(bool(stream));
164 
165  std::tie(line, nPieces) = tk.readMultiline(stream);
166  BOOST_TEST(line == lines[1] + lines[2] + lines[3]);
167  BOOST_TEST(nPieces == 3U);
168  BOOST_TEST(bool(stream));
169 
170  std::tie(line, nPieces) = tk.readMultiline(stream);
171  BOOST_TEST(line == lines[4] + "\n"s + lines[5]);
172  BOOST_TEST(nPieces == 2U);
173  BOOST_TEST(bool(stream));
174 
175  std::tie(line, nPieces) = tk.readMultiline(stream);
176  BOOST_TEST(line == lines[6]);
177  BOOST_TEST(nPieces == 1U);
178  BOOST_TEST(bool(stream));
179 
180  std::tie(line, nPieces) = tk.readMultiline(stream);
181  BOOST_TEST(line.empty());
182  BOOST_TEST(nPieces == 0U);
183  BOOST_TEST(!stream);
184 
185 } // readMultiline_test()
186 
187 
188 void readMultiline_endOfInput_test() {
189 
190  using namespace std::string_literals;
191 
192  icarus::ParsingToolkit tk; // default configuration
193 
194  std::istringstream stream;
195  auto const refillStream = [&stream](std::string s) -> std::istringstream&
196  { stream.clear(); stream.str(std::move(s)); return stream; };
197 
198  refillStream(
199  R"()"
200  );
201  auto [ line, nPieces ] = tk.readMultiline(stream);
202  BOOST_TEST(line == "");
203  BOOST_TEST(nPieces == 0U);
204 
205  refillStream(
206  R"(The end.)" "\n"
207  );
208  std::tie(line, nPieces) = tk.readMultiline(stream);
209  BOOST_TEST(line == "The end.");
210  BOOST_TEST(nPieces == 1U);
211  std::tie(line, nPieces) = tk.readMultiline(stream);
212  BOOST_TEST(line == "");
213  BOOST_TEST(nPieces == 0U);
214 
215  refillStream(
216  R"(Unfinished business)"
217  );
218  std::tie(line, nPieces) = tk.readMultiline(stream);
219  BOOST_TEST(line == "Unfinished business");
220  BOOST_TEST(nPieces == 1U);
221 
222  refillStream(
223  R"(Unfinished business\)"
224  );
225  std::tie(line, nPieces) = tk.readMultiline(stream);
226  BOOST_TEST(line == "Unfinished business");
227  BOOST_TEST(nPieces == 1U);
228 
229  refillStream(
230  R"(Unfinished "business)"
231  );
232  std::tie(line, nPieces) = tk.readMultiline(stream);
233  BOOST_TEST(line == R"(Unfinished "business)");
234  BOOST_TEST(nPieces == 1U);
235 
236  refillStream(
237  R"(Unfinished\)"
238  "\n"
239  R"( business)"
240  );
241  std::tie(line, nPieces) = tk.readMultiline(stream);
242  BOOST_TEST(line == "Unfinished business");
243  BOOST_TEST(nPieces == 2U);
244 
245  refillStream(
246  R"("Unfinished)"
247  "\n"
248  R"( business")"
249  );
250  std::tie(line, nPieces) = tk.readMultiline(stream);
251  BOOST_TEST(line == "\"Unfinished\n business\""s);
252  BOOST_TEST(nPieces == 2U);
253 
254  refillStream(
255  R"("Unfinished)"
256  "\n"
257  R"( business)"
258  );
259  std::tie(line, nPieces) = tk.readMultiline(stream);
260  BOOST_TEST(line == "\"Unfinished\n business");
261  BOOST_TEST(nPieces == 2U);
262 
263 } // readMultiline_endOfInput_test()
264 
265 
266 // -----------------------------------------------------------------------------
267 void splitWords_test() {
268 
269  using namespace std::string_literals;
270 
271  using Words_t = std::vector<std::string>;
272 
273  icarus::ParsingToolkit tk; // default configuration
274 
275  {
276  std::string const s { R"(a b c)" };
277  auto const& words = tk.splitWords(s);
278  Words_t const expected { R"(a)"s, R"(b)"s, R"(c)"s };
279  BOOST_CHECK_EQUAL_COLLECTIONS
280  (words.begin(), words.end(), expected.begin(), expected.end());
281  }
282 
283  {
284  std::string const s { R"( a "bb" c )" };
285  auto const& words = tk.splitWords(s);
286  Words_t const expected { R"(a)"s, R"("bb")"s, R"(c)"s };
287  BOOST_CHECK_EQUAL_COLLECTIONS
288  (words.begin(), words.end(), expected.begin(), expected.end());
289  }
290 
291  {
292  std::string const s { R"(a "b c")" };
293  auto const& words = tk.splitWords(s);
294  Words_t const expected { R"(a)"s, R"("b c")"s };
295  BOOST_CHECK_EQUAL_COLLECTIONS
296  (words.begin(), words.end(), expected.begin(), expected.end());
297  }
298 
299  {
300  std::string const s { R"("")" };
301  auto const& words = tk.splitWords(s);
302  Words_t const expected { R"("")"s };
303  BOOST_CHECK_EQUAL_COLLECTIONS
304  (words.begin(), words.end(), expected.begin(), expected.end());
305  }
306 
307  {
308  std::string const s { R"(a\ b c)" };
309  auto const& words = tk.splitWords(s);
310  Words_t const expected { R"(a\ b)"s, R"(c)"s };
311  BOOST_CHECK_EQUAL_COLLECTIONS
312  (words.begin(), words.end(), expected.begin(), expected.end());
313  }
314 
315  {
316  std::string const s { R"(a b" c")" };
317  auto const& words = tk.splitWords(s);
318  Words_t const expected { R"(a)"s, R"(b" c")"s };
319  BOOST_CHECK_EQUAL_COLLECTIONS
320  (words.begin(), words.end(), expected.begin(), expected.end());
321  }
322 
323  {
324  std::string const s { R"("a "b \"c d"")" };
325  auto const& words = tk.splitWords(s);
326  Words_t const expected { R"("a "b)"s, R"(\"c)"s, R"(d"")"s };
327  BOOST_CHECK_EQUAL_COLLECTIONS
328  (words.begin(), words.end(), expected.begin(), expected.end());
329  }
330 
331 
332 } // splitWords_test()
333 
334 
335 // -----------------------------------------------------------------------------
336 void findFirstUnescaped_test() {
337 
338  using namespace std::string_view_literals;
339 
340  icarus::ParsingToolkit const tk; // default configuration
341 
342  {
343  std::string_view const sv { R"()" };
344  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
345  BOOST_TEST(key == "");
346  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 0);
347  }
348 
349  {
350  std::string_view const sv { R"(a :: b)" };
351  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
352  BOOST_TEST(key == ":");
353  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 2);
354  }
355 
356  {
357  std::string_view const sv { R"(a ::+ b)" };
358  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
359  BOOST_TEST(key == ":");
360  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 2);
361  }
362 
363  {
364  std::string_view const sv { R"(a \::+ b)" };
365  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
366  BOOST_TEST(key == ":+");
367  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 4);
368  }
369 
370  {
371  std::string_view const sv { R"(a\ ::+ b)" };
372  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
373  BOOST_TEST(key == ":");
374  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 3);
375  }
376 
377  {
378  std::string_view const sv { R"(a :+)" };
379  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
380  BOOST_TEST(key == ":+");
381  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 2);
382  }
383 
384  {
385  std::string_view const sv { R"(a \:+)" };
386  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
387  BOOST_TEST(key == "");
388  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 5);
389  }
390 
391  {
392  std::string_view const sv { R"(:+ b)" };
393  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
394  BOOST_TEST(key == ":+");
395  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 0);
396  }
397 
398  {
399  std::string_view const sv { R"(nope)" };
400  std::string_view const key = tk.findFirstUnescaped(sv, { ":+", ":" });
401  BOOST_TEST(key == "");
402  BOOST_TEST(std::distance(sv.begin(), key.begin()) == 4);
403  }
404 
405 
406 } // findFirstUnescaped_test()
407 
408 
409 // -----------------------------------------------------------------------------
410 void findFirstUnquoted_test() {
411 
412  using namespace std::string_view_literals;
413 
414  icarus::ParsingToolkit const tk; // default configuration
415 
416  auto const findAndSplit = [tk](std::string_view sv)
417  { return tk.splitOn(sv, tk.findFirstUnquoted(sv, { ":+", ":" })); };
418 
419  {
420  auto const [ pre, sep, post ] = findAndSplit(""sv);
421  BOOST_TEST(pre == R"()"sv);
422  BOOST_TEST(sep == R"()"sv);
423  BOOST_TEST(post == R"()"sv);
424  }
425 
426  {
427  auto const [ pre, sep, post ] = findAndSplit(R"(a:+b)"sv);
428  BOOST_TEST(pre == R"(a)"sv);
429  BOOST_TEST(sep == R"(:+)"sv);
430  BOOST_TEST(post == R"(b)"sv);
431  }
432 
433  {
434  auto const [ pre, sep, post ] = findAndSplit(R"(a::b)"sv);
435  BOOST_TEST(pre == R"(a)"sv);
436  BOOST_TEST(sep == R"(:)"sv);
437  BOOST_TEST(post == R"(:b)"sv);
438  }
439 
440  {
441  auto const [ pre, sep, post ] = findAndSplit(R"(a\:b)"sv);
442  BOOST_TEST(pre == R"(a\:b)"sv);
443  BOOST_TEST(sep == R"()"sv);
444  BOOST_TEST(post == R"()"sv);
445  }
446 
447  {
448  auto const [ pre, sep, post ] = findAndSplit(R"(a :+ b)"sv);
449  BOOST_TEST(pre == R"(a )"sv);
450  BOOST_TEST(sep == R"(:+)"sv);
451  BOOST_TEST(post == R"( b)"sv);
452  }
453 
454  {
455  auto const [ pre, sep, post ] = findAndSplit(R"(a\ :+ b)"sv);
456  BOOST_TEST(pre == R"(a\ )"sv);
457  BOOST_TEST(sep == R"(:+)"sv);
458  BOOST_TEST(post == R"( b)"sv);
459  }
460 
461  {
462  auto const [ pre, sep, post ] = findAndSplit(R"(a\ \::+ b)"sv);
463  BOOST_TEST(pre == R"(a\ \:)"sv);
464  BOOST_TEST(sep == R"(:+)"sv);
465  BOOST_TEST(post == R"( b)"sv);
466  }
467 
468  {
469  auto const [ pre, sep, post ] = findAndSplit(R"(a\ \:+ b)"sv);
470  BOOST_TEST(pre == R"(a\ \:+ b)"sv);
471  BOOST_TEST(sep == R"()"sv);
472  BOOST_TEST(post == R"()"sv);
473  }
474 
475  {
476  auto const [ pre, sep, post ] = findAndSplit(R"(a\ :+\ b)"sv);
477  BOOST_TEST(pre == R"(a\ )"sv);
478  BOOST_TEST(sep == R"(:+)"sv);
479  BOOST_TEST(post == R"(\ b)"sv);
480  }
481 
482  {
483  auto const [ pre, sep, post ] = findAndSplit(R"("a":+"b")"sv);
484  BOOST_TEST(pre == R"("a")"sv);
485  BOOST_TEST(sep == R"(:+)"sv);
486  BOOST_TEST(post == R"("b")"sv);
487  }
488 
489  {
490  auto const [ pre, sep, post ] = findAndSplit(R"("a:":+"b")"sv);
491  BOOST_TEST(pre == R"("a:")"sv);
492  BOOST_TEST(sep == R"(:+)"sv);
493  BOOST_TEST(post == R"("b")"sv);
494  }
495 
496  {
497  auto const [ pre, sep, post ] = findAndSplit(R"(a:":+"b)"sv);
498  BOOST_TEST(pre == R"(a)"sv);
499  BOOST_TEST(sep == R"(:)"sv);
500  BOOST_TEST(post == R"(":+"b)"sv);
501  }
502 
503  {
504  auto const [ pre, sep, post ] = findAndSplit(R"("a:"aa:+b"b)"sv);
505  BOOST_TEST(pre == R"("a:"aa)"sv);
506  BOOST_TEST(sep == R"(:+)"sv);
507  BOOST_TEST(post == R"(b"b)"sv);
508  }
509 
510 } // findFirstUnquoted_test()
511 
512 
513 // -----------------------------------------------------------------------------
514 void removeCommentLine_test() {
515 
516  using namespace std::string_view_literals;
517 
518  icarus::ParsingToolkit const tk; // default configuration
519 
520  std::vector<std::string_view> words, expected;
521  BOOST_TEST(
522  std::distance(words.begin(), tk.findCommentWord(words.begin(), words.end()))
523  == 0
524  );
525  BOOST_CHECK_EQUAL_COLLECTIONS
526  (words.begin(), words.end(), expected.begin(), expected.end());
527 
528 
529  words = { " One"sv, "Two#"sv, "Th#ree"sv, " #Four"sv, "Five"sv, "Six"sv };
530  expected = words;
531  BOOST_TEST(
532  std::distance(words.begin(), tk.findCommentWord(words.begin(), words.end()))
533  == 6
534  );
535  tk.removeCommentLine(words);
536  BOOST_CHECK_EQUAL_COLLECTIONS
537  (words.begin(), words.end(), expected.begin(), expected.end());
538 
539 
540  words = { "#"sv, "a"sv, "long"sv, "comment"sv };
541  expected = {};
542  BOOST_TEST(
543  std::distance(words.begin(), tk.findCommentWord(words.begin(), words.end()))
544  == 0
545  );
546  tk.removeCommentLine(words);
547  BOOST_CHECK_EQUAL_COLLECTIONS
548  (words.begin(), words.end(), expected.begin(), expected.end());
549 
550 
551  words = { "#acompactcomment"sv, "!"sv };
552  expected = {};
553  BOOST_TEST(
554  std::distance(words.begin(), tk.findCommentWord(words.begin(), words.end()))
555  == 0
556  );
557  tk.removeCommentLine(words);
558  BOOST_CHECK_EQUAL_COLLECTIONS
559  (words.begin(), words.end(), expected.begin(), expected.end());
560 
561 
562  words = { " One"sv, "Two#"sv, "Th#ree"sv, " #Four"sv, "#Five"sv, "Six"sv };
563  expected = { " One"sv, "Two#"sv, "Th#ree"sv, " #Four"sv };
564  BOOST_TEST(
565  std::distance(words.begin(), tk.findCommentWord(words.begin(), words.end()))
566  == 4
567  );
568  tk.removeCommentLine(words);
569  BOOST_CHECK_EQUAL_COLLECTIONS
570  (words.begin(), words.end(), expected.begin(), expected.end());
571 
572 
573  words = { " One"sv, "Two#"sv, "Th#ree"sv, "\\#Four"sv, "#"sv, "Six"sv };
574  expected = { " One"sv, "Two#"sv, "Th#ree"sv, "\\#Four"sv };
575  BOOST_TEST(
576  std::distance(words.begin(), tk.findCommentWord(words.begin(), words.end()))
577  == 4
578  );
579  tk.removeCommentLine(words);
580  BOOST_CHECK_EQUAL_COLLECTIONS
581  (words.begin(), words.end(), expected.begin(), expected.end());
582 
583 
584 } // removeCommentLine_test()
585 
586 
587 // -----------------------------------------------------------------------------
588 void findQuotationStart_test() {
589 
590  using namespace std::string_literals;
591 
592  std::string_view sv {
593  R"(No quote. Still no \"quote\". "a" "b c" 'd' "e 'f' g" "h 'i" "j' k" "unfinished 'm')"
594  };
595 
596  icarus::ParsingToolkit tk; // default configuration
597 
598  std::string_view qsv;
599  icarus::ParsingToolkit::QuotSpec_t const* qptr = nullptr;
600 
601  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
602  BOOST_TEST_REQUIRE(qsv == R"("a" "b c" 'd' "e 'f' g" "h 'i" "j' k" "unfinished 'm')");
603  BOOST_TEST_REQUIRE(qptr != nullptr);
604  BOOST_TEST_REQUIRE(qptr->first == "\""s);
605 
606  sv = qsv;
607  sv.remove_prefix(3);
608  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
609  BOOST_TEST_REQUIRE(qsv == R"("b c" 'd' "e 'f' g" "h 'i" "j' k" "unfinished 'm')");
610  BOOST_TEST_REQUIRE(qptr != nullptr);
611  BOOST_TEST_REQUIRE(qptr->first == "\""s);
612 
613  sv = qsv;
614  sv.remove_prefix(5);
615  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
616  BOOST_TEST_REQUIRE(qsv == R"('d' "e 'f' g" "h 'i" "j' k" "unfinished 'm')");
617  BOOST_TEST_REQUIRE(qptr != nullptr);
618  BOOST_TEST_REQUIRE(qptr->first == "'"s);
619 
620  sv = qsv;
621  sv.remove_prefix(3);
622  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
623  BOOST_TEST_REQUIRE(qsv == R"("e 'f' g" "h 'i" "j' k" "unfinished 'm')");
624  BOOST_TEST_REQUIRE(qptr != nullptr);
625  BOOST_TEST_REQUIRE(qptr->first == "\""s);
626 
627  sv = qsv;
628  sv.remove_prefix(9);
629  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
630  BOOST_TEST_REQUIRE(qsv == R"("h 'i" "j' k" "unfinished 'm')");
631  BOOST_TEST_REQUIRE(qptr != nullptr);
632  BOOST_TEST_REQUIRE(qptr->first == "\""s);
633 
634  sv = qsv;
635  sv.remove_prefix(6);
636  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
637  BOOST_TEST_REQUIRE(qsv == R"("j' k" "unfinished 'm')");
638  BOOST_TEST_REQUIRE(qptr != nullptr);
639  BOOST_TEST_REQUIRE(qptr->first == "\""s);
640 
641  sv = qsv;
642  sv.remove_prefix(6);
643  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
644  BOOST_TEST_REQUIRE(qsv == R"("unfinished 'm')");
645  BOOST_TEST_REQUIRE(qptr != nullptr);
646  BOOST_TEST_REQUIRE(qptr->first == "\""s);
647 
648  sv = qsv;
649  sv.remove_prefix(1);
650  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
651  BOOST_TEST_REQUIRE(qsv == R"('m')");
652  BOOST_TEST_REQUIRE(qptr != nullptr);
653  BOOST_TEST_REQUIRE(qptr->first == "'"s);
654 
655  sv = qsv;
656  sv.remove_prefix(3);
657  std::tie(qsv, qptr) = tk.findQuotationStart(sv);
658  BOOST_TEST_REQUIRE(qsv.empty());
659  BOOST_TEST_REQUIRE(qptr == nullptr);
660 
661 } // findQuotationStart_test()
662 
663 
664 void findQuotationStart_noquote_test() {
665 
666  icarus::ParsingToolkit tk; // default configuration
667 
668  std::string_view sv { "No quotes at all." };
669  auto const [ qsv, qptr ] = tk.findQuotationStart(sv);
670  BOOST_TEST(qsv.empty());
671  BOOST_TEST(qptr == nullptr);
672 
673  // special feature: in case there is no match, we still point to the end
674  BOOST_TEST((qsv.begin() == sv.end()));
675 
676 } // findQuotationStart_noquote_test()
677 
678 
679 // -----------------------------------------------------------------------------
680 void findQuotationEnd_test() {
681 
682  using namespace std::string_literals;
683  using namespace std::string_view_literals;
684 
685  icarus::ParsingToolkit tk; // default configuration
686 
687  BOOST_TEST(tk.findQuotationEnd(R"('already'!)", "'"s) == "'already'!"sv);
688  BOOST_TEST(tk.findQuotationEnd(R"(already'!)", "'"s) == "'!"sv);
689  BOOST_TEST(tk.findQuotationEnd(R"(\'already\''!)", "'"s) == "'!"sv);
690  BOOST_TEST(tk.findQuotationEnd(R"(Nope.)", "'"s).empty());
691 
692 } // findQuotationEnd_test()
693 
694 
695 void findQuotationEnd_noquote_test() {
696 
697  icarus::ParsingToolkit tk; // default configuration
698 
699  std::string_view const sv { "No end quotes at all." };
700 
701  // the test string should have no quotes at all:
702  BOOST_TEST_REQUIRE(tk.findQuotationStart(sv).second == nullptr);
703 
704  std::string const& firstEndQuote = tk.params().quotes.front().second; // any
705 
706  std::string_view const qsv = tk.findQuotationEnd(sv, firstEndQuote);
707  BOOST_TEST(qsv.empty());
708 
709  // special feature: in case there is no match, we still point to the end
710  BOOST_TEST((qsv.begin() == sv.end()));
711 
712 } // findQuotationEnd_noquote_test()
713 
714 
715 // -----------------------------------------------------------------------------
716 void isQuotationUnclosed_test() {
717 
718  using namespace std::string_literals;
719 
720  std::string_view sv {
721  R"(No quote. Still no \"quote\". "a" "b c" 'd' "e 'f' g" "h 'i" "j' k" "unfinished 'm'".)"
722  };
723 
724  icarus::ParsingToolkit tk; // default configuration
725 
726  BOOST_TEST(!tk.isQuotationUnclosed(sv));
727  sv.remove_suffix(1);
728  BOOST_TEST(!tk.isQuotationUnclosed(sv));
729  sv.remove_suffix(1);
730  BOOST_TEST( tk.isQuotationUnclosed(sv));
731  sv.remove_suffix(1);
732  BOOST_TEST( tk.isQuotationUnclosed(sv));
733  sv.remove_suffix(1);
734  BOOST_TEST( tk.isQuotationUnclosed(sv));
735  sv.remove_suffix(1);
736  BOOST_TEST( tk.isQuotationUnclosed(sv));
737 
738 } // isQuotationUnclosed_test()
739 
740 
741 // -----------------------------------------------------------------------------
742 void splitOn_test() {
743 
744  std::string_view const sv { "aa:bbb" };
745  // key must always be a substring of sv (not checked)
746  {
747  std::string_view const key = sv.substr(2,1);
748  BOOST_TEST_REQUIRE(key == ":");
749  auto const [ pre, sep, post ] = icarus::ParsingToolkit::splitOn(sv, key);
750  BOOST_TEST(sep == key);
751  BOOST_TEST(pre == "aa");
752  BOOST_TEST(post == "bbb");
753  }
754  {
755  std::string_view const key = sv.substr(0,2);
756  BOOST_TEST_REQUIRE(key == "aa");
757  auto const [ pre, sep, post ] = icarus::ParsingToolkit::splitOn(sv, key);
758  BOOST_TEST(sep == key);
759  BOOST_TEST(pre == "");
760  BOOST_TEST(post == ":bbb");
761  }
762 
763  {
764  std::string_view const key = sv.substr(4, 1);
765  BOOST_TEST_REQUIRE(key == "b");
766  auto const [ pre, sep, post ] = icarus::ParsingToolkit::splitOn(sv, key);
767  BOOST_TEST(sep == key);
768  BOOST_TEST(pre == "aa:b");
769  BOOST_TEST(post == "b");
770  }
771 
772  {
773  std::string_view const key
774  = icarus::ParsingToolkit::make_view(sv.end(), sv.end());
775  BOOST_TEST_REQUIRE(key == "");
776  auto const [ pre, sep, post ] = icarus::ParsingToolkit::splitOn(sv, key);
777  BOOST_TEST(sep == key);
778  BOOST_TEST(pre == sv);
779  BOOST_TEST(post == "");
780  }
781 
782  {
783  std::string_view const key
784  = icarus::ParsingToolkit::make_view(sv.begin(), sv.begin());
785  BOOST_TEST_REQUIRE(key == "");
786  auto const [ pre, sep, post ] = icarus::ParsingToolkit::splitOn(sv, key);
787  BOOST_TEST(sep == key);
788  BOOST_TEST(pre == "");
789  BOOST_TEST(post == sv);
790  }
791 
792  {
793  std::string_view const key = sv;
794  auto const [ pre, sep, post ] = icarus::ParsingToolkit::splitOn(sv, key);
795  BOOST_TEST(sep == key);
796  BOOST_TEST(pre == "");
797  BOOST_TEST(post == "");
798  }
799 
800 } // splitOn_test()
801 
802 
803 // -----------------------------------------------------------------------------
804 void removeEscapes_test() {
805 
806  icarus::ParsingToolkit tk; // default configuration
807 
808  std::string s { R"()" };
809 
810  BOOST_TEST(tk.removeWordEscapes(R"()") == R"()");
811 
812  BOOST_TEST(tk.removeWordEscapes(R"(\a)") == R"(a)");
813  BOOST_TEST(tk.removeWordEscapes(R"(\a\h\a)") == R"(aha)");
814  BOOST_TEST(tk.removeWordEscapes(R"(\a\\h\a)") == R"(a\ha)");
815  BOOST_TEST(tk.removeWordEscapes(R"(aha)") == R"(aha)");
816  BOOST_TEST(tk.removeWordEscapes(R"(aha\)") == R"(aha\)");
817 
818 } // removeEscapes_test()
819 
820 
821 // -----------------------------------------------------------------------------
822 void removeEscapesDocumentation_test() {
823 
824  // "\\\\a" -> "\\a" -> "\a" -> "a" -> "a"
825 
826  icarus::ParsingToolkit tk; // default configuration
827 
828  std::string s { R"(\\\\a)" };
829 
830  s = tk.removeWordEscapes(s);
831  BOOST_TEST(s == R"(\\a)");
832 
833  s = tk.removeWordEscapes(s);
834  BOOST_TEST(s == R"(\a)");
835 
836  s = tk.removeWordEscapes(s);
837  BOOST_TEST(s == R"(a)");
838 
839  s = tk.removeWordEscapes(s);
840  BOOST_TEST(s == R"(a)");
841 
842 } // removeEscapesDocumentation_test()
843 
844 
845 // -----------------------------------------------------------------------------
846 void removeQuotations_test() {
847 
848  using namespace std::string_literals;
849 
850  icarus::ParsingToolkit tk; // default configuration
851 
852  BOOST_TEST(tk.removeWordQuotations(R"()") == R"()");
853 
854  std::string s = tk.removeWordQuotations(R"("a'b"c'b"a)");
855  BOOST_TEST(s == R"(a'bc'b"a)");
856 
857  s = tk.removeWordQuotations(s);
858  BOOST_TEST(s == R"(abcb"a)");
859 
860  s = tk.removeWordQuotations(s);
861  BOOST_TEST(s == R"(abcb"a)");
862 
863 
864  s = tk.removeWordQuotations(R"("a'b""c'b"a)");
865  BOOST_TEST(s == R"(a'bc'ba)");
866 
867 
868  s = tk.removeWordQuotations(R"("a'b\"c'b"a)");
869  BOOST_TEST(s == R"(a'b\"c'ba)");
870 
871  s = tk.removeWordQuotations(s);
872  BOOST_TEST(s == R"(ab\"cba)");
873 
874  s = tk.removeWordQuotations(s);
875  BOOST_TEST(s == R"(ab\"cba)");
876 
877 } // removeQuotations_test()
878 
879 
880 void removeQuotationsDocumentation_test() {
881 
882  using namespace std::string_literals;
883 
884  icarus::ParsingToolkit tk; // default configuration
885  /*
886  * example, `a1 << "b1 << 'c1 << " or " << c2' << b2" << a2` will become first
887  * `a1 << b1 << 'c1 << " or " << c2' << b2 << a2`, then
888  * `a1 << b1 << c1 << " or " << c2 << b2 << a2`, and eventually
889  * `a1 << b1 << c1 << or << c2 << b2 << a2`).
890  */
891 
892  std::string s { R"(a1 << "b1 << 'c1 << " or " << c2' << b2" << a2)" };
893 
894  s = tk.removeWordQuotations(s);
895  BOOST_TEST(s == R"(a1 << b1 << 'c1 << or << c2' << b2 << a2)");
896 
897  s = tk.removeWordQuotations(s);
898  BOOST_TEST(s == R"(a1 << b1 << c1 << or << c2 << b2 << a2)");
899 
900  s = tk.removeWordQuotations(s);
901  BOOST_TEST(s == R"(a1 << b1 << c1 << or << c2 << b2 << a2)");
902 
903 } // removeQuotationsDocumentation_test()
904 
905 
906 // -----------------------------------------------------------------------------
907 // BEGIN Test cases -----------------------------------------------------------
908 // -----------------------------------------------------------------------------
909 BOOST_AUTO_TEST_CASE(isCharacterEscaped_testcase) {
910 
912 
913 } // BOOST_AUTO_TEST_CASE(isCharacterEscaped_testcase)
914 
915 
916 BOOST_AUTO_TEST_CASE(findNextCharacter_testcase) {
917 
919 
920 } // BOOST_AUTO_TEST_CASE(findNextCharacter_testcase)
921 
922 
923 BOOST_AUTO_TEST_CASE(removeTrailingCharacters_testcase) {
924 
926 
927 } // BOOST_AUTO_TEST_CASE(removeTrailingCharacters_testcase)
928 
929 
930 BOOST_AUTO_TEST_CASE(readMultiline_testcase) {
931 
933  readMultiline_endOfInput_test();
934 
935 } // BOOST_AUTO_TEST_CASE(readMultiline_testcase)
936 
937 
938 BOOST_AUTO_TEST_CASE(splitWords_testcase) {
939 
940  splitWords_test();
941 
942 } // BOOST_AUTO_TEST_CASE(splitWords_testcase)
943 
944 
945 BOOST_AUTO_TEST_CASE(findFirstUnescaped_testcase) {
946 
947  findFirstUnescaped_test();
948 
949 } // BOOST_AUTO_TEST_CASE(findFirstUnescaped_testcase)
950 
951 
952 BOOST_AUTO_TEST_CASE(findFirstUnquoted_testcase) {
953 
954  findFirstUnquoted_test();
955 
956 } // BOOST_AUTO_TEST_CASE(findFirstUnquoted_testcase)
957 
958 
959 BOOST_AUTO_TEST_CASE(removeCommentLine_testcase) {
960 
961  removeCommentLine_test();
962 
963 } // BOOST_AUTO_TEST_CASE(removeCommentLine_testcase)
964 
965 
966 BOOST_AUTO_TEST_CASE(findQuotationStart_testcase) {
967 
968  findQuotationStart_test();
969  findQuotationStart_noquote_test();
970 
971 } // BOOST_AUTO_TEST_CASE(findQuotationStart_testcase)
972 
973 
974 BOOST_AUTO_TEST_CASE(findQuotationEnd_testcase) {
975 
976  findQuotationEnd_test();
977  findQuotationEnd_noquote_test();
978 
979 } // BOOST_AUTO_TEST_CASE(findQuotationEnd_testcase)
980 
981 
982 BOOST_AUTO_TEST_CASE(isQuotationUnclosed_testcase) {
983 
984  isQuotationUnclosed_test();
985 
986 } // BOOST_AUTO_TEST_CASE(isQuotationUnclosed_testcase)
987 
988 
989 BOOST_AUTO_TEST_CASE(splitOn_testcase) {
990 
991  splitOn_test();
992 
993 } // BOOST_AUTO_TEST_CASE(splitOn_testcase)
994 
995 
996 BOOST_AUTO_TEST_CASE(removeEscapes_testcase) {
997 
998  removeEscapes_test();
999  removeEscapesDocumentation_test();
1000 
1001 } // BOOST_AUTO_TEST_CASE(removeEscapes_testcase)
1002 
1003 
1004 BOOST_AUTO_TEST_CASE(removeQuotations_testcase) {
1005 
1006  removeQuotations_test();
1007  removeQuotationsDocumentation_test();
1008 
1009 } // BOOST_AUTO_TEST_CASE(removeQuotations_testcase)
1010 
1011 
1012 // -----------------------------------------------------------------------------
1013 // END Test cases -------------------------------------------------------------
1014 // -----------------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(AllTests)
std::pair< std::string_view, QuotSpec_t const * > findQuotationStart(std::string_view sv) const
Finds the start of the next quotation in sv.
std::string removeWordEscapes(std::string &&w) const
Returns a copy of w with all escape characters removed.
bool isQuotationUnclosed(std::string_view sv) const
Returns if the sequence sv has unclosed quotation at its end.
static SplitView_t splitOn(std::string_view sv, std::string_view sep)
Splits the view sv in three: before sep, sep and after sep.
void isCharacterEscaped_test()
std::vector< QuotSpec_t > quotes
List of matching start and end of quote.
bool isCharacterEscaped(BIter begin, BIter itCh) const
Returns whether the character pointed by itCh is escaped or not.
std::string_view findFirstUnescaped(std::string_view sv, BIter beginKey, EIter endKey) const
Finds the first of the specified keys in sv.
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:265
double distance(geo::Point_t const &point, CathodeDesc_t const &cathode)
Returns the distance of a point from the cathode.
static std::string_view make_view(std::string const &s)
Creates a std::string_view from an entire string s.
std::pair< std::string, std::string > QuotSpec_t
Specification of quotation: opening and closing.
Utilities for text parsing.
void findNextBlank_test()
void removeTrailingBlanks_test()
Params_t const & params() const noexcept
Returns the current parameters of parsing.
std::string_view findQuotationEnd(std::string_view sv, std::string const &quotEnd) const
Finds the quotation end in sv.
Simple text parsing utilities.
then echo File list $list not found else cat $list while read file do echo $file sed s
Definition: file_to_url.sh:60
void readMultiline_test()
void removeCommentLine(std::vector< WordType > &words) const
Removes all the words from the one starting with a comment marker.
std::string removeWordQuotations(std::string &&w) const
Returns a copy of w with no quotation starts and ends.