Orcus
yaml_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
9 #define INCLUDED_ORCUS_YAML_PARSER_HPP
10 
11 #include "orcus/yaml_parser_base.hpp"
12 #include "orcus/parser_global.hpp"
13 
14 namespace orcus {
15 
21 {
22 public:
26  void begin_parse() {}
27 
31  void end_parse() {}
32 
36  void begin_document() {}
37 
41  void end_document() {}
42 
46  void begin_sequence() {}
47 
51  void end_sequence() {}
52 
56  void begin_map() {}
57 
61  void begin_map_key() {}
62 
66  void end_map_key() {}
67 
71  void end_map() {}
72 
78  void string(std::string_view value)
79  {
80  (void)value;
81  }
82 
88  void number(double val)
89  {
90  (void)val;
91  }
92 
96  void boolean_true() {}
97 
101  void boolean_false() {}
102 
106  void null() {}
107 };
108 
117 template<typename HandlerT>
119 {
120 public:
121  typedef HandlerT handler_type;
122 
123  yaml_parser(std::string_view content, handler_type& hdl);
124 
125  void parse();
126 
127 private:
128  size_t end_scope();
129  void check_or_begin_document();
130  void check_or_begin_map();
131  void check_or_begin_sequence();
132  void parse_value(const char* p, size_t len);
133  void push_value(const char* p, size_t len);
134  void parse_line(const char* p, size_t len);
135  void parse_map_key(const char* p, size_t len);
136 
137  void handler_begin_parse();
138  void handler_end_parse();
139  void handler_begin_document();
140  void handler_end_document();
141  void handler_begin_sequence();
142  void handler_end_sequence();
143  void handler_begin_map();
144  void handler_end_map();
145  void handler_begin_map_key();
146  void handler_end_map_key();
147  void handler_string(const char* p, size_t n);
148  void handler_number(double val);
149  void handler_boolean_true();
150  void handler_boolean_false();
151  void handler_null();
152 
153 private:
154  handler_type& m_handler;
155 };
156 
157 template<typename _Handler>
159 {
160  push_parse_token(yaml::detail::parse_token_t::begin_parse);
161  m_handler.begin_parse();
162 }
163 
164 template<typename _Handler>
165 void yaml_parser<_Handler>::handler_end_parse()
166 {
167  push_parse_token(yaml::detail::parse_token_t::end_parse);
168  m_handler.end_parse();
169 }
170 
171 template<typename _Handler>
172 void yaml_parser<_Handler>::handler_begin_document()
173 {
174  push_parse_token(yaml::detail::parse_token_t::begin_document);
175  m_handler.begin_document();
176 }
177 
178 template<typename _Handler>
179 void yaml_parser<_Handler>::handler_end_document()
180 {
181  push_parse_token(yaml::detail::parse_token_t::end_document);
182  m_handler.end_document();
183 }
184 
185 template<typename _Handler>
186 void yaml_parser<_Handler>::handler_begin_sequence()
187 {
188  push_parse_token(yaml::detail::parse_token_t::begin_sequence);
189  m_handler.begin_sequence();
190 }
191 
192 template<typename _Handler>
193 void yaml_parser<_Handler>::handler_end_sequence()
194 {
195  push_parse_token(yaml::detail::parse_token_t::end_sequence);
196  m_handler.end_sequence();
197 }
198 
199 template<typename _Handler>
200 void yaml_parser<_Handler>::handler_begin_map()
201 {
202  push_parse_token(yaml::detail::parse_token_t::begin_map);
203  m_handler.begin_map();
204 }
205 
206 template<typename _Handler>
207 void yaml_parser<_Handler>::handler_end_map()
208 {
209  push_parse_token(yaml::detail::parse_token_t::end_map);
210  m_handler.end_map();
211 }
212 
213 template<typename _Handler>
214 void yaml_parser<_Handler>::handler_begin_map_key()
215 {
216  push_parse_token(yaml::detail::parse_token_t::begin_map_key);
217  m_handler.begin_map_key();
218 }
219 
220 template<typename _Handler>
221 void yaml_parser<_Handler>::handler_end_map_key()
222 {
223  push_parse_token(yaml::detail::parse_token_t::end_map_key);
224  m_handler.end_map_key();
225 }
226 
227 template<typename _Handler>
228 void yaml_parser<_Handler>::handler_string(const char* p, size_t n)
229 {
230  push_parse_token(yaml::detail::parse_token_t::string);
231  m_handler.string({p, n});
232 }
233 
234 template<typename _Handler>
235 void yaml_parser<_Handler>::handler_number(double val)
236 {
237  push_parse_token(yaml::detail::parse_token_t::number);
238  m_handler.number(val);
239 }
240 
241 template<typename _Handler>
242 void yaml_parser<_Handler>::handler_boolean_true()
243 {
244  push_parse_token(yaml::detail::parse_token_t::boolean_true);
245  m_handler.boolean_true();
246 }
247 
248 template<typename _Handler>
249 void yaml_parser<_Handler>::handler_boolean_false()
250 {
251  push_parse_token(yaml::detail::parse_token_t::boolean_false);
252  m_handler.boolean_false();
253 }
254 
255 template<typename _Handler>
256 void yaml_parser<_Handler>::handler_null()
257 {
258  push_parse_token(yaml::detail::parse_token_t::null);
259  m_handler.null();
260 }
261 
262 template<typename _Handler>
263 yaml_parser<_Handler>::yaml_parser(std::string_view content, handler_type& hdl) :
264  yaml::parser_base(content), m_handler(hdl) {}
265 
266 template<typename _Handler>
267 void yaml_parser<_Handler>::parse()
268 {
269  handler_begin_parse();
270 
271  while (has_char())
272  {
273  reset_on_new_line();
274 
275  size_t indent = parse_indent();
276  if (indent == parse_indent_end_of_stream)
277  break;
278 
279  if (indent == parse_indent_blank_line)
280  continue;
281 
282  size_t cur_scope = get_scope();
283 
284  if (cur_scope <= indent)
285  {
286  if (in_literal_block())
287  {
288  handle_line_in_literal(indent);
289  continue;
290  }
291 
292  if (has_line_buffer())
293  {
294  // This line is part of multi-line string. Push the line to the
295  // buffer as-is.
296  handle_line_in_multi_line_string();
297  continue;
298  }
299  }
300 
301  if (cur_scope == scope_empty)
302  {
303  if (indent > 0)
304  throw parse_error(
305  "first node of the document should not be indented.", offset());
306 
307  push_scope(indent);
308  }
309  else if (indent > cur_scope)
310  {
311  push_scope(indent);
312  }
313  else if (indent < cur_scope)
314  {
315  // Current indent is less than the current scope level.
316  do
317  {
318  cur_scope = end_scope();
319  if (cur_scope < indent)
320  throw parse_error("parse: invalid indent level.", offset());
321  }
322  while (indent < cur_scope);
323  }
324 
325  // Parse the rest of the line.
326  std::string_view line = parse_to_end_of_line();
327  line = trim(line);
328 
329  assert(!line.empty());
330  parse_line(line.data(), line.size());
331  }
332 
333  // End all remaining scopes.
334  size_t cur_scope = get_scope();
335  while (cur_scope != scope_empty)
336  cur_scope = end_scope();
337 
338  if (get_doc_hash())
339  handler_end_document();
340 
341  handler_end_parse();
342 }
343 
344 template<typename _Handler>
345 size_t yaml_parser<_Handler>::end_scope()
346 {
347  switch (get_scope_type())
348  {
349  case yaml::detail::scope_t::map:
350  {
351  if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
352  handler_null();
353 
354  handler_end_map();
355  break;
356  }
357  case yaml::detail::scope_t::sequence:
358  {
359  if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
360  handler_null();
361 
362  handler_end_sequence();
363  break;
364  }
365  case yaml::detail::scope_t::multi_line_string:
366  {
367  std::string_view merged = merge_line_buffer();
368  handler_string(merged.data(), merged.size());
369  break;
370  }
371  default:
372  {
373  if (has_line_buffer())
374  {
375  assert(get_line_buffer_count() == 1);
376  std::string_view line = pop_line_front();
377  parse_value(line.data(), line.size());
378  }
379  }
380  }
381  return pop_scope();
382 }
383 
384 template<typename _Handler>
385 void yaml_parser<_Handler>::check_or_begin_document()
386 {
387  if (!get_doc_hash())
388  {
389  set_doc_hash(mp_char);
390  handler_begin_document();
391  }
392 }
393 
394 template<typename _Handler>
395 void yaml_parser<_Handler>::check_or_begin_map()
396 {
397  switch (get_scope_type())
398  {
399  case yaml::detail::scope_t::unset:
400  {
401  check_or_begin_document();
402  set_scope_type(yaml::detail::scope_t::map);
403  handler_begin_map();
404  break;
405  }
406  case yaml::detail::scope_t::map:
407  {
408  if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
409  handler_null();
410  break;
411  }
412  default:
413  ;
414  }
415 }
416 
417 template<typename _Handler>
418 void yaml_parser<_Handler>::check_or_begin_sequence()
419 {
420  switch (get_scope_type())
421  {
422  case yaml::detail::scope_t::unset:
423  {
424  check_or_begin_document();
425  set_scope_type(yaml::detail::scope_t::sequence);
426  handler_begin_sequence();
427  break;
428  }
429  case yaml::detail::scope_t::sequence:
430  {
431  if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
432  handler_null();
433  break;
434  }
435  default:
436  ;
437  }
438 
439  push_parse_token(yaml::detail::parse_token_t::begin_sequence_element);
440 }
441 
442 template<typename _Handler>
443 void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
444 {
445  check_or_begin_document();
446 
447  const char* p0 = p;
448  const char* p_end = p + len;
449  double val;
450  p = parse_numeric(p, p_end, val);
451  if (p == p_end)
452  {
453  handler_number(val);
454  return;
455  }
456 
457  yaml::detail::keyword_t kw = parse_keyword(p0, len);
458 
459  if (kw != yaml::detail::keyword_t::unknown)
460  {
461  switch (kw)
462  {
463  case yaml::detail::keyword_t::null:
464  handler_null();
465  break;
466  case yaml::detail::keyword_t::boolean_true:
467  handler_boolean_true();
468  break;
469  case yaml::detail::keyword_t::boolean_false:
470  handler_boolean_false();
471  break;
472  default:
473  ;
474  }
475 
476  return;
477  }
478 
479  // Failed to parse it as a number or a keyword. It must be a string.
480  handler_string(p0, len);
481 }
482 
483 template<typename _Handler>
484 void yaml_parser<_Handler>::push_value(const char* p, size_t len)
485 {
486  check_or_begin_document();
487 
488  if (has_line_buffer() && get_scope_type() == yaml::detail::scope_t::unset)
489  set_scope_type(yaml::detail::scope_t::multi_line_string);
490 
491  push_line_back(p, len);
492 }
493 
494 template<typename _Handler>
495 void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
496 {
497  const char* p_end = p + len;
498  const char* p0 = p; // Save the original head position.
499 
500  if (*p == '-')
501  {
502  ++p;
503  if (p == p_end)
504  {
505  // List item start.
506  check_or_begin_sequence();
507  return;
508  }
509 
510  switch (*p)
511  {
512  case '-':
513  {
514  // start of a document
515  ++p;
516  if (p == p_end)
517  throw parse_error("parse_line: line ended with '--'.", offset_last_char_of_line());
518 
519  if (*p != '-')
520  parse_error::throw_with(
521  "parse_line: '-' expected but '", *p, "' found.",
522  offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
523 
524  ++p; // Skip the '-'.
525  set_doc_hash(p);
526  handler_begin_document();
527  clear_scopes();
528 
529  if (p != p_end)
530  {
531  skip_blanks(p, p_end-p);
532 
533  // Whatever comes after '---' is equivalent of first node.
534  assert(p != p_end);
535  push_scope(0);
536  parse_line(p, p_end-p);
537  }
538  return;
539  }
540  case ' ':
541  {
542  check_or_begin_sequence();
543 
544  // list item start with inline first item content.
545  ++p;
546  if (p == p_end)
547  throw parse_error(
548  "parse_line: list item expected, but the line ended prematurely.",
549  offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
550 
551  skip_blanks(p, p_end-p);
552 
553  size_t scope_width = get_scope() + (p-p0);
554  push_scope(scope_width);
555  parse_line(p, p_end-p);
556  return;
557  }
558  default:
559  // It is none of the above.
560  p = p0;
561  }
562 
563  }
564 
565  if (get_scope_type() == yaml::detail::scope_t::sequence)
566  parse_error::throw_with(
567  "'-' was expected for a sequence element, but '", *p, "' was found.",
568  offset_last_char_of_line()-len+1);
569 
570  // If the line doesn't start with a "- ", it must be a dictionary key.
571  parse_map_key(p, len);
572 }
573 
574 template<typename _Handler>
575 void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
576 {
577  const char* p_end = p + len;
578  const char* p0 = p; // Save the original head position.
579 
580  switch (*p)
581  {
582  case '"':
583  {
584  std::string_view quoted_str = parse_double_quoted_string_value(p, len);
585 
586  if (p == p_end)
587  {
588  handler_string(quoted_str.data(), quoted_str.size());
589  return;
590  }
591 
592  skip_blanks(p, p_end-p);
593 
594  if (*p != ':')
595  throw parse_error(
596  "parse_map_key: ':' is expected after the quoted string key.",
597  offset() - std::ptrdiff_t(p_end-p+1));
598 
599  check_or_begin_map();
600  handler_begin_map_key();
601  handler_string(quoted_str.data(), quoted_str.size());
602  handler_end_map_key();
603 
604  ++p; // skip the ':'.
605  if (p == p_end)
606  return;
607 
608  // Skip all white spaces.
609  skip_blanks(p, p_end-p);
610  }
611  break;
612  case '\'':
613  {
614  std::string_view quoted_str = parse_single_quoted_string_value(p, len);
615 
616  if (p == p_end)
617  {
618  handler_string(quoted_str.data(), quoted_str.size());
619  return;
620  }
621 
622  skip_blanks(p, p_end-p);
623 
624  if (*p != ':')
625  throw parse_error(
626  "parse_map_key: ':' is expected after the quoted string key.",
627  offset() - std::ptrdiff_t(p_end-p+1));
628 
629  check_or_begin_map();
630  handler_begin_map_key();
631  handler_string(quoted_str.data(), quoted_str.size());
632  handler_end_map_key();
633 
634  ++p; // skip the ':'.
635  if (p == p_end)
636  return;
637 
638  skip_blanks(p, p_end-p);
639  }
640  break;
641  default:
642  {
643  key_value kv = parse_key_value(p, p_end-p);
644 
645  if (kv.key.empty())
646  {
647  // No map key found.
648  if (*p == '|')
649  {
650  start_literal_block();
651  return;
652  }
653 
654  push_value(p, len);
655  return;
656  }
657 
658  check_or_begin_map();
659  handler_begin_map_key();
660  parse_value(kv.key.data(), kv.key.size());
661  handler_end_map_key();
662 
663  if (kv.value.empty())
664  return;
665 
666  p = kv.value.data();
667  }
668  }
669 
670  if (*p == '|')
671  {
672  start_literal_block();
673  return;
674  }
675 
676  // inline map item.
677  if (*p == '-')
678  throw parse_error(
679  "parse_map_key: sequence entry is not allowed as an inline map item.",
680  offset() - std::ptrdiff_t(p_end-p+1));
681 
682  size_t scope_width = get_scope() + (p-p0);
683  push_scope(scope_width);
684  parse_line(p, p_end-p);
685 }
686 
687 }
688 
689 #endif
690 
691 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: yaml_parser_base.hpp:66
Definition: yaml_parser.hpp:21
void end_map()
Definition: yaml_parser.hpp:71
void begin_parse()
Definition: yaml_parser.hpp:26
void end_sequence()
Definition: yaml_parser.hpp:51
void end_parse()
Definition: yaml_parser.hpp:31
void boolean_true()
Definition: yaml_parser.hpp:96
void begin_map_key()
Definition: yaml_parser.hpp:61
void boolean_false()
Definition: yaml_parser.hpp:101
void begin_map()
Definition: yaml_parser.hpp:56
void number(double val)
Definition: yaml_parser.hpp:88
void end_document()
Definition: yaml_parser.hpp:41
void begin_document()
Definition: yaml_parser.hpp:36
void string(std::string_view value)
Definition: yaml_parser.hpp:78
void begin_sequence()
Definition: yaml_parser.hpp:46
void end_map_key()
Definition: yaml_parser.hpp:66
Definition: yaml_parser.hpp:119