8 #ifndef INCLUDED_ORCUS_SAX_PARSER_HPP
9 #define INCLUDED_ORCUS_SAX_PARSER_HPP
11 #include "sax_parser_base.hpp"
13 #include <string_view>
98 (void)val; (void)
transient;
130 template<
typename HandlerT,
typename ConfigT = sax_parser_default_config>
134 typedef HandlerT handler_type;
135 typedef ConfigT config_type;
137 sax_parser(std::string_view content, handler_type& handler);
151 void element_open(std::ptrdiff_t begin_pos);
152 void element_close(std::ptrdiff_t begin_pos);
154 void declaration(
const char* name_check);
161 handler_type& m_handler;
164 template<
typename HandlerT,
typename ConfigT>
171 template<
typename HandlerT,
typename ConfigT>
172 void sax_parser<HandlerT,ConfigT>::parse()
177 skip_space_and_control();
180 assert(m_buffer_pos == 0);
183 template<
typename HandlerT,
typename ConfigT>
184 void sax_parser<HandlerT,ConfigT>::header()
192 skip_space_and_control();
194 if (!has_char() || cur_char() !=
'<')
195 throw malformed_xml_error(
"xml file must begin with '<'.", offset());
197 if (config_type::baseline_version >= 11)
201 if (next_char_checked() !=
'?')
202 throw malformed_xml_error(
"xml file must begin with '<?'.", offset());
208 template<
typename HandlerT,
typename ConfigT>
209 void sax_parser<HandlerT,ConfigT>::body()
213 if (cur_char() ==
'<')
216 if (!m_root_elem_open)
220 else if (m_nest_level)
228 template<
typename HandlerT,
typename ConfigT>
229 void sax_parser<HandlerT,ConfigT>::element()
231 assert(cur_char() ==
'<');
232 std::ptrdiff_t pos = offset();
233 char c = next_char_checked();
243 declaration(
nullptr);
250 template<
typename HandlerT,
typename ConfigT>
251 void sax_parser<HandlerT,ConfigT>::element_open(std::ptrdiff_t begin_pos)
253 sax::parser_element elem;
254 element_name(elem, begin_pos);
258 skip_space_and_control();
259 char c = cur_char_checked();
263 if (next_and_char() !=
'>')
264 throw malformed_xml_error(
"expected '/>' to self-close the element.", offset());
266 elem.end_pos = offset();
267 m_handler.start_element(elem);
269 m_handler.end_element(elem);
271 m_root_elem_open =
false;
272 #if ORCUS_DEBUG_SAX_PARSER
273 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
281 elem.end_pos = offset();
283 m_handler.start_element(elem);
285 #if ORCUS_DEBUG_SAX_PARSER
286 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
295 template<
typename HandlerT,
typename ConfigT>
296 void sax_parser<HandlerT,ConfigT>::element_close(std::ptrdiff_t begin_pos)
298 assert(cur_char() ==
'/');
301 sax::parser_element elem;
302 element_name(elem, begin_pos);
304 if (cur_char() !=
'>')
305 throw malformed_xml_error(
"expected '>' to close the element.", offset());
307 elem.end_pos = offset();
309 m_handler.end_element(elem);
310 #if ORCUS_DEBUG_SAX_PARSER
311 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
314 m_root_elem_open =
false;
317 template<
typename HandlerT,
typename ConfigT>
318 void sax_parser<HandlerT,ConfigT>::special_tag()
320 assert(cur_char() ==
'!');
322 size_t len = available_size();
324 throw malformed_xml_error(
"special tag too short.", offset());
326 switch (next_and_char())
331 if (next_and_char() !=
'-')
332 throw malformed_xml_error(
"comment expected.", offset());
336 throw malformed_xml_error(
"malformed comment.", offset());
345 expects_next(
"CDATA[", 6);
353 expects_next(
"OCTYPE", 6);
354 skip_space_and_control();
360 throw malformed_xml_error(
"failed to parse special tag.", offset());
364 template<
typename HandlerT,
typename ConfigT>
365 void sax_parser<HandlerT,ConfigT>::declaration(
const char* name_check)
367 assert(cur_char() ==
'?');
371 std::string_view decl_name;
373 #if ORCUS_DEBUG_SAX_PARSER
374 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
377 if (name_check && decl_name != name_check)
379 std::ostringstream os;
380 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
381 throw malformed_xml_error(os.str(), offset());
384 m_handler.start_declaration(decl_name);
385 skip_space_and_control();
388 while (cur_char_checked() !=
'?')
391 skip_space_and_control();
393 if (next_char_checked() !=
'>')
394 throw malformed_xml_error(
"declaration must end with '?>'.", offset());
396 m_handler.end_declaration(decl_name);
399 #if ORCUS_DEBUG_SAX_PARSER
400 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
404 template<
typename HandlerT,
typename ConfigT>
405 void sax_parser<HandlerT,ConfigT>::cdata()
407 size_t len = available_size();
411 const char* p0 = mp_char;
412 size_t i = 0, match = 0;
413 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
427 else if (c ==
'>' && match == 2)
430 size_t cdata_len = i - 2;
431 m_handler.characters(std::string_view(p0, cdata_len),
false);
438 throw malformed_xml_error(
"malformed CDATA section.", offset());
441 template<
typename HandlerT,
typename ConfigT>
442 void sax_parser<HandlerT,ConfigT>::doctype()
445 sax::doctype_declaration param;
446 name(param.root_element);
447 skip_space_and_control();
450 size_t len = available_size();
452 throw malformed_xml_error(
"DOCTYPE section too short.", offset());
454 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
458 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
459 throw malformed_xml_error(
"malformed DOCTYPE section.", offset());
461 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
465 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
466 throw malformed_xml_error(
"malformed DOCTYPE section.", offset());
470 skip_space_and_control();
473 value(param.fpi,
false);
475 has_char_throw(
"DOCTYPE section too short.");
476 skip_space_and_control();
477 has_char_throw(
"DOCTYPE section too short.");
479 if (cur_char() ==
'>')
482 #if ORCUS_DEBUG_SAX_PARSER
483 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
485 m_handler.doctype(param);
491 value(param.uri,
false);
493 has_char_throw(
"DOCTYPE section too short.");
494 skip_space_and_control();
495 has_char_throw(
"DOCTYPE section too short.");
497 if (cur_char() !=
'>')
498 throw malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
500 #if ORCUS_DEBUG_SAX_PARSER
501 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
503 m_handler.doctype(param);
507 template<
typename HandlerT,
typename ConfigT>
508 void sax_parser<HandlerT,ConfigT>::characters()
510 const char* p0 = mp_char;
511 for (; has_char(); next())
513 if (cur_char() ==
'<')
516 if (cur_char() ==
'&')
519 cell_buffer& buf = get_cell_buffer();
521 buf.append(p0, mp_char-p0);
522 characters_with_encoded_char(buf);
524 m_handler.characters(std::string_view{},
false);
526 m_handler.characters(buf.str(),
true);
533 std::string_view val(p0, mp_char-p0);
534 m_handler.characters(val,
false);
538 template<
typename HandlerT,
typename ConfigT>
539 void sax_parser<HandlerT,ConfigT>::attribute()
541 sax::parser_attribute attr;
542 attribute_name(attr.ns, attr.name);
544 #if ORCUS_DEBUG_SAX_PARSER
545 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
548 skip_space_and_control();
550 char c = cur_char_checked();
553 std::ostringstream os;
554 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
555 throw malformed_xml_error(os.str(), offset());
559 skip_space_and_control();
561 attr.transient = value(attr.value,
true);
566 #if ORCUS_DEBUG_SAX_PARSER
567 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
570 m_handler.attribute(attr);
Definition: parser_base.hpp:23
Definition: sax_parser_base.hpp:108
Definition: sax_parser.hpp:28
void end_declaration(std::string_view decl)
Definition: sax_parser.hpp:57
void doctype(const orcus::sax::doctype_declaration &dtd)
Definition: sax_parser.hpp:35
void attribute(const orcus::sax::parser_attribute &attr)
Definition: sax_parser.hpp:109
void characters(std::string_view val, bool transient)
Definition: sax_parser.hpp:96
void start_declaration(std::string_view decl)
Definition: sax_parser.hpp:47
void end_element(const orcus::sax::parser_element &elem)
Definition: sax_parser.hpp:77
void start_element(const orcus::sax::parser_element &elem)
Definition: sax_parser.hpp:67
Definition: sax_parser.hpp:132
Definition: sax_parser_base.hpp:37
Definition: sax_parser_base.hpp:96
Definition: sax_parser_base.hpp:77
Definition: sax_parser.hpp:18
static constexpr uint8_t baseline_version
Definition: sax_parser.hpp:24