Orcus
sax_ns_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
9 #define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
10 
11 #include "sax_parser.hpp"
12 #include "xml_namespace.hpp"
13 
14 #include <unordered_set>
15 #include <vector>
16 #include <algorithm>
17 
18 namespace orcus {
19 
21 {
23  xmlns_id_t ns;
25  std::string_view ns_alias;
27  std::string_view name;
29  std::ptrdiff_t begin_pos;
31  std::ptrdiff_t end_pos;
32 };
33 
35 {
37  xmlns_id_t ns;
39  std::string_view ns_alias;
41  std::string_view name;
43  std::string_view value;
45  bool transient;
46 };
47 
48 namespace sax { namespace detail {
49 
51 {
52  std::string_view ns;
53  std::string_view name;
54 
55  entity_name(std::string_view _ns, std::string_view _name) :
56  ns(_ns), name(_name) {}
57 
58  bool operator== (const entity_name& other) const
59  {
60  return other.ns == ns && other.name == name;
61  }
62 
63  struct hash
64  {
65  size_t operator() (const entity_name& v) const
66  {
67  std::hash<std::string_view> hasher;
68  return hasher(v.ns) + hasher(v.name);
69  }
70  };
71 };
72 
73 typedef std::unordered_set<std::string_view> ns_keys_type;
74 typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
75 
76 struct elem_scope
77 {
78  xmlns_id_t ns;
79  std::string_view name;
80  ns_keys_type ns_keys;
81 
82  elem_scope() {}
83  elem_scope(const elem_scope&) = delete;
84  elem_scope(elem_scope&& other) = default;
85 };
86 
87 using elem_scopes_type = std::vector<elem_scope>;
88 
89 }} // namespace sax::detail
90 
92 {
93 public:
100  {
101  (void)dtd;
102  }
103 
111  void start_declaration(std::string_view decl)
112  {
113  (void)decl;
114  }
115 
121  void end_declaration(std::string_view decl)
122  {
123  (void)decl;
124  }
125 
132  {
133  (void)elem;
134  }
135 
142  {
143  (void)elem;
144  }
145 
160  void characters(std::string_view val, bool transient)
161  {
162  (void)val;
163  (void)transient;
164  }
165 
177  void attribute(std::string_view name, std::string_view val)
178  {
179  (void)name;
180  (void)val;
181  }
182 
192  {
193  (void)attr;
194  }
195 };
196 
211 template<typename HandlerT>
213 {
214 public:
215  typedef HandlerT handler_type;
216 
217  sax_ns_parser(std::string_view content, xmlns_context& ns_cxt, handler_type& handler);
218  ~sax_ns_parser() = default;
219 
226  void parse();
227 
228 private:
233  class handler_wrapper
234  {
235  sax::detail::elem_scopes_type m_scopes;
236  sax::detail::ns_keys_type m_ns_keys;
237  sax::detail::entity_names_type m_attrs;
238 
239  sax_ns_parser_element m_elem;
241 
242  xmlns_context& m_ns_cxt;
243  handler_type& m_handler;
244 
245  bool m_declaration;
246 
247  public:
248  handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
249 
250  void doctype(const sax::doctype_declaration& dtd)
251  {
252  m_handler.doctype(dtd);
253  }
254 
255  void start_declaration(std::string_view name)
256  {
257  m_declaration = true;
258  m_handler.start_declaration(name);
259  }
260 
261  void end_declaration(std::string_view name)
262  {
263  m_declaration = false;
264  m_handler.end_declaration(name);
265  }
266 
267  void start_element(const sax::parser_element& elem)
268  {
269  m_scopes.emplace_back();
270  sax::detail::elem_scope& scope = m_scopes.back();
271  scope.ns = m_ns_cxt.get(elem.ns);
272  scope.name = elem.name;
273  scope.ns_keys.swap(m_ns_keys);
274 
275  m_elem.ns = scope.ns;
276  m_elem.ns_alias = elem.ns;
277  m_elem.name = scope.name;
278  m_elem.begin_pos = elem.begin_pos;
279  m_elem.end_pos = elem.end_pos;
280  m_handler.start_element(m_elem);
281 
282  m_attrs.clear();
283  }
284 
285  void end_element(const sax::parser_element& elem)
286  {
287  sax::detail::elem_scope& scope = m_scopes.back();
288  if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
289  throw malformed_xml_error("mis-matching closing element.", -1);
290 
291  m_elem.ns = scope.ns;
292  m_elem.ns_alias = elem.ns;
293  m_elem.name = scope.name;
294  m_elem.begin_pos = elem.begin_pos;
295  m_elem.end_pos = elem.end_pos;
296  m_handler.end_element(m_elem);
297 
298  // Pop all namespaces declared in this scope.
299  for (const std::string_view& key : scope.ns_keys)
300  m_ns_cxt.pop(key);
301 
302  m_scopes.pop_back();
303  }
304 
305  void characters(std::string_view val, bool transient)
306  {
307  m_handler.characters(val, transient);
308  }
309 
310  void attribute(const sax::parser_attribute& attr)
311  {
312  if (m_declaration)
313  {
314  // XML declaration attribute. Pass it through to the handler without namespace.
315  m_handler.attribute(attr.name, attr.value);
316  return;
317  }
318 
319  if (m_attrs.count(sax::detail::entity_name(attr.ns, attr.name)) > 0)
320  throw malformed_xml_error(
321  "You can't define two attributes of the same name in the same element.", -1);
322 
323  m_attrs.insert(sax::detail::entity_name(attr.ns, attr.name));
324 
325  if (attr.ns.empty() && attr.name == "xmlns")
326  {
327  // Default namespace
328  m_ns_cxt.push(std::string_view{}, attr.value);
329  m_ns_keys.insert(std::string_view{});
330  return;
331  }
332 
333  if (attr.ns == "xmlns")
334  {
335  // Namespace alias
336  if (!attr.name.empty())
337  {
338  m_ns_cxt.push(attr.name, attr.value);
339  m_ns_keys.insert(attr.name);
340  }
341  return;
342  }
343 
344  m_attr.ns = attr.ns.empty() ? XMLNS_UNKNOWN_ID : m_ns_cxt.get(attr.ns);
345  m_attr.ns_alias = attr.ns;
346  m_attr.name = attr.name;
347  m_attr.value = attr.value;
348  m_attr.transient = attr.transient;
349  m_handler.attribute(m_attr);
350  }
351  };
352 
353 private:
354  handler_wrapper m_wrapper;
355  sax_parser<handler_wrapper> m_parser;
356 };
357 
358 template<typename HandlerT>
359 sax_ns_parser<HandlerT>::sax_ns_parser(
360  std::string_view content, xmlns_context& ns_cxt, handler_type& handler) :
361  m_wrapper(ns_cxt, handler), m_parser(content, m_wrapper)
362 {
363 }
364 
365 template<typename HandlerT>
367 {
368  m_parser.parse();
369 }
370 
371 }
372 
373 #endif
374 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: sax_ns_parser.hpp:92
void attribute(std::string_view name, std::string_view val)
Definition: sax_ns_parser.hpp:177
void attribute(const orcus::sax_ns_parser_attribute &attr)
Definition: sax_ns_parser.hpp:191
void start_element(const orcus::sax_ns_parser_element &elem)
Definition: sax_ns_parser.hpp:131
void characters(std::string_view val, bool transient)
Definition: sax_ns_parser.hpp:160
void start_declaration(std::string_view decl)
Definition: sax_ns_parser.hpp:111
void doctype(const orcus::sax::doctype_declaration &dtd)
Definition: sax_ns_parser.hpp:99
void end_element(const orcus::sax_ns_parser_element &elem)
Definition: sax_ns_parser.hpp:141
void end_declaration(std::string_view decl)
Definition: sax_ns_parser.hpp:121
Definition: sax_ns_parser.hpp:213
void parse()
Definition: sax_ns_parser.hpp:366
Definition: xml_namespace.hpp:100
xmlns_id_t push(std::string_view alias, std::string_view uri)
xmlns_id_t get(std::string_view alias) const
void pop(std::string_view alias)
Definition: sax_ns_parser.hpp:77
Definition: sax_ns_parser.hpp:64
Definition: sax_ns_parser.hpp:51
Definition: sax_parser_base.hpp:37
Definition: sax_ns_parser.hpp:35
std::string_view value
Definition: sax_ns_parser.hpp:43
xmlns_id_t ns
Definition: sax_ns_parser.hpp:37
bool transient
Definition: sax_ns_parser.hpp:45
std::string_view name
Definition: sax_ns_parser.hpp:41
std::string_view ns_alias
Definition: sax_ns_parser.hpp:39
Definition: sax_ns_parser.hpp:21
std::ptrdiff_t end_pos
Definition: sax_ns_parser.hpp:31
xmlns_id_t ns
Definition: sax_ns_parser.hpp:23
std::string_view name
Definition: sax_ns_parser.hpp:27
std::string_view ns_alias
Definition: sax_ns_parser.hpp:25
std::ptrdiff_t begin_pos
Definition: sax_ns_parser.hpp:29