Orcus
yaml_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
9 #define INCLUDED_ORCUS_YAML_PARSER_HPP
10 
11 #include "orcus/yaml_parser_base.hpp"
12 #include "orcus/parser_global.hpp"
13 
14 namespace orcus {
15 
17 {
18 public:
22  void begin_parse() {}
23 
27  void end_parse() {}
28 
32  void begin_document() {}
33 
37  void end_document() {}
38 
42  void begin_sequence() {}
43 
47  void end_sequence() {}
48 
52  void begin_map() {}
53 
57  void begin_map_key() {}
58 
62  void end_map_key() {}
63 
67  void end_map() {}
68 
75  void string(const char* p, size_t n)
76  {
77  (void)p; (void)n;
78  }
79 
85  void number(double val)
86  {
87  (void)val;
88  }
89 
93  void boolean_true() {}
94 
98  void boolean_false() {}
99 
103  void null() {}
104 };
105 
106 template<typename _Handler>
108 {
109 public:
110  typedef _Handler handler_type;
111 
112  yaml_parser(const char* p, size_t n, handler_type& hdl);
113 
114  void parse();
115 
116 private:
117  size_t end_scope();
118  void check_or_begin_document();
119  void check_or_begin_map();
120  void check_or_begin_sequence();
121  void parse_value(const char* p, size_t len);
122  void push_value(const char* p, size_t len);
123  void parse_line(const char* p, size_t len);
124  void parse_map_key(const char* p, size_t len);
125 
126  void handler_begin_parse();
127  void handler_end_parse();
128  void handler_begin_document();
129  void handler_end_document();
130  void handler_begin_sequence();
131  void handler_end_sequence();
132  void handler_begin_map();
133  void handler_end_map();
134  void handler_begin_map_key();
135  void handler_end_map_key();
136  void handler_string(const char* p, size_t n);
137  void handler_number(double val);
138  void handler_boolean_true();
139  void handler_boolean_false();
140  void handler_null();
141 
142 private:
143  handler_type& m_handler;
144 };
145 
146 template<typename _Handler>
148 {
149  push_parse_token(yaml::detail::parse_token_t::begin_parse);
150  m_handler.begin_parse();
151 }
152 
153 template<typename _Handler>
154 void yaml_parser<_Handler>::handler_end_parse()
155 {
156  push_parse_token(yaml::detail::parse_token_t::end_parse);
157  m_handler.end_parse();
158 }
159 
160 template<typename _Handler>
161 void yaml_parser<_Handler>::handler_begin_document()
162 {
163  push_parse_token(yaml::detail::parse_token_t::begin_document);
164  m_handler.begin_document();
165 }
166 
167 template<typename _Handler>
168 void yaml_parser<_Handler>::handler_end_document()
169 {
170  push_parse_token(yaml::detail::parse_token_t::end_document);
171  m_handler.end_document();
172 }
173 
174 template<typename _Handler>
175 void yaml_parser<_Handler>::handler_begin_sequence()
176 {
177  push_parse_token(yaml::detail::parse_token_t::begin_sequence);
178  m_handler.begin_sequence();
179 }
180 
181 template<typename _Handler>
182 void yaml_parser<_Handler>::handler_end_sequence()
183 {
184  push_parse_token(yaml::detail::parse_token_t::end_sequence);
185  m_handler.end_sequence();
186 }
187 
188 template<typename _Handler>
189 void yaml_parser<_Handler>::handler_begin_map()
190 {
191  push_parse_token(yaml::detail::parse_token_t::begin_map);
192  m_handler.begin_map();
193 }
194 
195 template<typename _Handler>
196 void yaml_parser<_Handler>::handler_end_map()
197 {
198  push_parse_token(yaml::detail::parse_token_t::end_map);
199  m_handler.end_map();
200 }
201 
202 template<typename _Handler>
203 void yaml_parser<_Handler>::handler_begin_map_key()
204 {
205  push_parse_token(yaml::detail::parse_token_t::begin_map_key);
206  m_handler.begin_map_key();
207 }
208 
209 template<typename _Handler>
210 void yaml_parser<_Handler>::handler_end_map_key()
211 {
212  push_parse_token(yaml::detail::parse_token_t::end_map_key);
213  m_handler.end_map_key();
214 }
215 
216 template<typename _Handler>
217 void yaml_parser<_Handler>::handler_string(const char* p, size_t n)
218 {
219  push_parse_token(yaml::detail::parse_token_t::string);
220  m_handler.string(p, n);
221 }
222 
223 template<typename _Handler>
224 void yaml_parser<_Handler>::handler_number(double val)
225 {
226  push_parse_token(yaml::detail::parse_token_t::number);
227  m_handler.number(val);
228 }
229 
230 template<typename _Handler>
231 void yaml_parser<_Handler>::handler_boolean_true()
232 {
233  push_parse_token(yaml::detail::parse_token_t::boolean_true);
234  m_handler.boolean_true();
235 }
236 
237 template<typename _Handler>
238 void yaml_parser<_Handler>::handler_boolean_false()
239 {
240  push_parse_token(yaml::detail::parse_token_t::boolean_false);
241  m_handler.boolean_false();
242 }
243 
244 template<typename _Handler>
245 void yaml_parser<_Handler>::handler_null()
246 {
247  push_parse_token(yaml::detail::parse_token_t::null);
248  m_handler.null();
249 }
250 
251 template<typename _Handler>
252 yaml_parser<_Handler>::yaml_parser(const char* p, size_t n, handler_type& hdl) :
253  yaml::parser_base(p, n), m_handler(hdl) {}
254 
255 template<typename _Handler>
256 void yaml_parser<_Handler>::parse()
257 {
258  handler_begin_parse();
259 
260  while (has_char())
261  {
262  reset_on_new_line();
263 
264  size_t indent = parse_indent();
265  if (indent == parse_indent_end_of_stream)
266  break;
267 
268  if (indent == parse_indent_blank_line)
269  continue;
270 
271  size_t cur_scope = get_scope();
272 
273  if (cur_scope <= indent)
274  {
275  if (in_literal_block())
276  {
277  handle_line_in_literal(indent);
278  continue;
279  }
280 
281  if (has_line_buffer())
282  {
283  // This line is part of multi-line string. Push the line to the
284  // buffer as-is.
285  handle_line_in_multi_line_string();
286  continue;
287  }
288  }
289 
290  if (cur_scope == scope_empty)
291  {
292  if (indent > 0)
293  throw yaml::parse_error(
294  "first node of the document should not be indented.", offset());
295 
296  push_scope(indent);
297  }
298  else if (indent > cur_scope)
299  {
300  push_scope(indent);
301  }
302  else if (indent < cur_scope)
303  {
304  // Current indent is less than the current scope level.
305  do
306  {
307  cur_scope = end_scope();
308  if (cur_scope < indent)
309  throw yaml::parse_error("parse: invalid indent level.", offset());
310  }
311  while (indent < cur_scope);
312  }
313 
314  // Parse the rest of the line.
315  pstring line = parse_to_end_of_line();
316  line = line.trim();
317 
318  assert(!line.empty());
319  parse_line(line.get(), line.size());
320  }
321 
322  // End all remaining scopes.
323  size_t cur_scope = get_scope();
324  while (cur_scope != scope_empty)
325  cur_scope = end_scope();
326 
327  if (get_doc_hash())
328  handler_end_document();
329 
330  handler_end_parse();
331 }
332 
333 template<typename _Handler>
334 size_t yaml_parser<_Handler>::end_scope()
335 {
336  switch (get_scope_type())
337  {
338  case yaml::detail::scope_t::map:
339  {
340  if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
341  handler_null();
342 
343  handler_end_map();
344  break;
345  }
346  case yaml::detail::scope_t::sequence:
347  {
348  if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
349  handler_null();
350 
351  handler_end_sequence();
352  break;
353  }
354  case yaml::detail::scope_t::multi_line_string:
355  {
356  pstring merged = merge_line_buffer();
357  handler_string(merged.get(), merged.size());
358  break;
359  }
360  default:
361  {
362  if (has_line_buffer())
363  {
364  assert(get_line_buffer_count() == 1);
365  pstring line = pop_line_front();
366  parse_value(line.get(), line.size());
367  }
368  }
369  }
370  return pop_scope();
371 }
372 
373 template<typename _Handler>
374 void yaml_parser<_Handler>::check_or_begin_document()
375 {
376  if (!get_doc_hash())
377  {
378  set_doc_hash(mp_char);
379  handler_begin_document();
380  }
381 }
382 
383 template<typename _Handler>
384 void yaml_parser<_Handler>::check_or_begin_map()
385 {
386  switch (get_scope_type())
387  {
388  case yaml::detail::scope_t::unset:
389  {
390  check_or_begin_document();
391  set_scope_type(yaml::detail::scope_t::map);
392  handler_begin_map();
393  break;
394  }
395  case yaml::detail::scope_t::map:
396  {
397  if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
398  handler_null();
399  break;
400  }
401  default:
402  ;
403  }
404 }
405 
406 template<typename _Handler>
407 void yaml_parser<_Handler>::check_or_begin_sequence()
408 {
409  switch (get_scope_type())
410  {
411  case yaml::detail::scope_t::unset:
412  {
413  check_or_begin_document();
414  set_scope_type(yaml::detail::scope_t::sequence);
415  handler_begin_sequence();
416  break;
417  }
418  case yaml::detail::scope_t::sequence:
419  {
420  if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
421  handler_null();
422  break;
423  }
424  default:
425  ;
426  }
427 
428  push_parse_token(yaml::detail::parse_token_t::begin_sequence_element);
429 }
430 
431 template<typename _Handler>
432 void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
433 {
434  check_or_begin_document();
435 
436  const char* p0 = p;
437  const char* p_end = p + len;
438  double val = parse_numeric(p, len);
439  if (p == p_end)
440  {
441  handler_number(val);
442  return;
443  }
444 
445  yaml::detail::keyword_t kw = parse_keyword(p0, len);
446 
447  if (kw != yaml::detail::keyword_t::unknown)
448  {
449  switch (kw)
450  {
451  case yaml::detail::keyword_t::null:
452  handler_null();
453  break;
454  case yaml::detail::keyword_t::boolean_true:
455  handler_boolean_true();
456  break;
457  case yaml::detail::keyword_t::boolean_false:
458  handler_boolean_false();
459  break;
460  default:
461  ;
462  }
463 
464  return;
465  }
466 
467  // Failed to parse it as a number or a keyword. It must be a string.
468  handler_string(p0, len);
469 }
470 
471 template<typename _Handler>
472 void yaml_parser<_Handler>::push_value(const char* p, size_t len)
473 {
474  check_or_begin_document();
475 
476  if (has_line_buffer() && get_scope_type() == yaml::detail::scope_t::unset)
477  set_scope_type(yaml::detail::scope_t::multi_line_string);
478 
479  push_line_back(p, len);
480 }
481 
482 template<typename _Handler>
483 void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
484 {
485  const char* p_end = p + len;
486  const char* p0 = p; // Save the original head position.
487 
488  if (*p == '-')
489  {
490  ++p;
491  if (p == p_end)
492  {
493  // List item start.
494  check_or_begin_sequence();
495  return;
496  }
497 
498  switch (*p)
499  {
500  case '-':
501  {
502  // start of a document
503  ++p;
504  if (p == p_end)
505  throw yaml::parse_error("parse_line: line ended with '--'.", offset_last_char_of_line());
506 
507  if (*p != '-')
508  yaml::parse_error::throw_with(
509  "parse_line: '-' expected but '", *p, "' found.",
510  offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
511 
512  ++p; // Skip the '-'.
513  set_doc_hash(p);
514  handler_begin_document();
515  clear_scopes();
516 
517  if (p != p_end)
518  {
519  skip_blanks(p, p_end-p);
520 
521  // Whatever comes after '---' is equivalent of first node.
522  assert(p != p_end);
523  push_scope(0);
524  parse_line(p, p_end-p);
525  }
526  return;
527  }
528  case ' ':
529  {
530  check_or_begin_sequence();
531 
532  // list item start with inline first item content.
533  ++p;
534  if (p == p_end)
535  throw yaml::parse_error(
536  "parse_line: list item expected, but the line ended prematurely.",
537  offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
538 
539  skip_blanks(p, p_end-p);
540 
541  size_t scope_width = get_scope() + (p-p0);
542  push_scope(scope_width);
543  parse_line(p, p_end-p);
544  return;
545  }
546  default:
547  // It is none of the above.
548  p = p0;
549  }
550 
551  }
552 
553  if (get_scope_type() == yaml::detail::scope_t::sequence)
554  yaml::parse_error::throw_with(
555  "'-' was expected for a sequence element, but '", *p, "' was found.",
556  offset_last_char_of_line()-len+1);
557 
558  // If the line doesn't start with a "- ", it must be a dictionary key.
559  parse_map_key(p, len);
560 }
561 
562 template<typename _Handler>
563 void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
564 {
565  const char* p_end = p + len;
566  const char* p0 = p; // Save the original head position.
567 
568  switch (*p)
569  {
570  case '"':
571  {
572  pstring quoted_str = parse_double_quoted_string_value(p, len);
573 
574  if (p == p_end)
575  {
576  handler_string(quoted_str.get(), quoted_str.size());
577  return;
578  }
579 
580  skip_blanks(p, p_end-p);
581 
582  if (*p != ':')
583  throw yaml::parse_error(
584  "parse_map_key: ':' is expected after the quoted string key.",
585  offset() - std::ptrdiff_t(p_end-p+1));
586 
587  check_or_begin_map();
588  handler_begin_map_key();
589  handler_string(quoted_str.get(), quoted_str.size());
590  handler_end_map_key();
591 
592  ++p; // skip the ':'.
593  if (p == p_end)
594  return;
595 
596  // Skip all white spaces.
597  skip_blanks(p, p_end-p);
598  }
599  break;
600  case '\'':
601  {
602  pstring quoted_str = parse_single_quoted_string_value(p, len);
603 
604  if (p == p_end)
605  {
606  handler_string(quoted_str.get(), quoted_str.size());
607  return;
608  }
609 
610  skip_blanks(p, p_end-p);
611 
612  if (*p != ':')
613  throw yaml::parse_error(
614  "parse_map_key: ':' is expected after the quoted string key.",
615  offset() - std::ptrdiff_t(p_end-p+1));
616 
617  check_or_begin_map();
618  handler_begin_map_key();
619  handler_string(quoted_str.get(), quoted_str.size());
620  handler_end_map_key();
621 
622  ++p; // skip the ':'.
623  if (p == p_end)
624  return;
625 
626  skip_blanks(p, p_end-p);
627  }
628  break;
629  default:
630  {
631  key_value kv = parse_key_value(p, p_end-p);
632 
633  if (kv.key.empty())
634  {
635  // No map key found.
636  if (*p == '|')
637  {
638  start_literal_block();
639  return;
640  }
641 
642  push_value(p, len);
643  return;
644  }
645 
646  check_or_begin_map();
647  handler_begin_map_key();
648  parse_value(kv.key.get(), kv.key.size());
649  handler_end_map_key();
650 
651  if (kv.value.empty())
652  return;
653 
654  p = kv.value.get();
655  }
656  }
657 
658  if (*p == '|')
659  {
660  start_literal_block();
661  return;
662  }
663 
664  // inline map item.
665  if (*p == '-')
666  throw yaml::parse_error(
667  "parse_map_key: sequence entry is not allowed as an inline map item.",
668  offset() - std::ptrdiff_t(p_end-p+1));
669 
670  size_t scope_width = get_scope() + (p-p0);
671  push_scope(scope_width);
672  parse_line(p, p_end-p);
673 }
674 
675 }
676 
677 #endif
678 
679 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: yaml_parser_base.hpp:76
Definition: yaml_parser.hpp:17
void end_map()
Definition: yaml_parser.hpp:67
void begin_parse()
Definition: yaml_parser.hpp:22
void end_sequence()
Definition: yaml_parser.hpp:47
void end_parse()
Definition: yaml_parser.hpp:27
void string(const char *p, size_t n)
Definition: yaml_parser.hpp:75
void boolean_true()
Definition: yaml_parser.hpp:93
void begin_map_key()
Definition: yaml_parser.hpp:57
void boolean_false()
Definition: yaml_parser.hpp:98
void begin_map()
Definition: yaml_parser.hpp:52
void number(double val)
Definition: yaml_parser.hpp:85
void end_document()
Definition: yaml_parser.hpp:37
void begin_document()
Definition: yaml_parser.hpp:32
void begin_sequence()
Definition: yaml_parser.hpp:42
void end_map_key()
Definition: yaml_parser.hpp:62
Definition: yaml_parser.hpp:108