Orcus
css_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9 #define INCLUDED_ORCUS_CSS_PARSER_HPP
10 
11 #define ORCUS_DEBUG_CSS 0
12 
13 #include "parser_global.hpp"
14 #include "css_parser_base.hpp"
15 
16 #include <cassert>
17 #include <algorithm>
18 
19 #if ORCUS_DEBUG_CSS
20 #include <iostream>
21 using std::cout;
22 using std::endl;
23 #endif
24 
25 namespace orcus {
26 
32 {
33 public:
39  void at_rule_name(std::string_view name)
40  {
41  (void)name;
42  }
43 
57  void simple_selector_type(std::string_view type)
58  {
59  (void)type;
60  }
61 
75  void simple_selector_class(std::string_view cls)
76  {
77  (void)cls;
78  }
79 
95  void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
96  {
97  (void)pe;
98  }
99 
114  void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
115  {
116  (void)pc;
117  }
118 
132  void simple_selector_id(std::string_view id)
133  {
134  (void)id;
135  }
136 
144 
151  void end_selector() {}
152 
167  void combinator(orcus::css::combinator_t combinator)
168  {
169  (void)combinator;
170  }
171 
177  void property_name(std::string_view name)
178  {
179  (void)name;
180  }
181 
187  void value(std::string_view value)
188  {
189  (void)value;
190  }
191 
199  void rgb(uint8_t red, uint8_t green, uint8_t blue)
200  {
201  (void)red; (void)green; (void)blue;
202  }
203 
213  void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
214  {
215  (void)red; (void)green; (void)blue; (void)alpha;
216  }
217 
225  void hsl(uint8_t hue, uint8_t sat, uint8_t light)
226  {
227  (void)hue; (void)sat; (void)light;
228  }
229 
239  void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
240  {
241  (void)hue; (void)sat; (void)light; (void)alpha;
242  }
243 
249  void url(std::string_view url)
250  {
251  (void)url;
252  }
253 
257  void begin_parse() {}
258 
262  void end_parse() {}
263 
268  void begin_block() {}
269 
274  void end_block() {}
275 
286  void begin_property() {}
287 
291  void end_property() {}
292 };
293 
300 template<typename HandlerT>
302 {
303 public:
304  typedef HandlerT handler_type;
305 
306  css_parser(std::string_view content, handler_type& hdl);
307  void parse();
308 
309 private:
310  // Handlers - at the time a handler is called the current position is
311  // expected to point to the first unprocessed non-blank character, and
312  // each handler must set the current position to the next unprocessed
313  // non-blank character when it finishes.
314  void rule();
315  void at_rule_name();
316  void simple_selector_name();
317  void property_name();
318  void property();
319  void quoted_value(char c);
320  void value();
321  void function_value(std::string_view v);
322  void function_rgb(bool alpha);
323  void function_hsl(bool alpha);
324  void function_url();
325  void name_sep();
326  void property_sep();
327  void block();
328 
329  handler_type& m_handler;
330 };
331 
332 template<typename _Handler>
333 css_parser<_Handler>::css_parser(std::string_view content, handler_type& hdl) :
334  css::parser_base(content), m_handler(hdl) {}
335 
336 template<typename _Handler>
337 void css_parser<_Handler>::parse()
338 {
339  shrink_stream();
340 
341 #if ORCUS_DEBUG_CSS
342  std::cout << "compressed: '";
343  const char* p = mp_char;
344  for (; p != mp_end; ++p)
345  std::cout << *p;
346  std::cout << "'" << std::endl;
347 #endif
348  m_handler.begin_parse();
349  while (has_char())
350  rule();
351  m_handler.end_parse();
352 }
353 
354 template<typename _Handler>
355 void css_parser<_Handler>::rule()
356 {
357  // <selector name> , ... , <selector name> <block>
358  while (has_char())
359  {
360  if (skip_comment())
361  continue;
362 
363  char c = cur_char();
364  if (is_alpha(c))
365  {
366  simple_selector_name();
367  continue;
368  }
369 
370  switch (c)
371  {
372  case '>':
373  set_combinator(c, css::combinator_t::direct_child);
374  break;
375  case '+':
376  set_combinator(c, css::combinator_t::next_sibling);
377  break;
378  case '.':
379  case '#':
380  case '@':
381  simple_selector_name();
382  break;
383  case ',':
384  name_sep();
385  break;
386  case '{':
387  reset_before_block();
388  block();
389  break;
390  default:
391  parse_error::throw_with("rule: failed to parse '", c, "'", offset());
392  }
393  }
394 }
395 
396 template<typename _Handler>
397 void css_parser<_Handler>::at_rule_name()
398 {
399  assert(has_char());
400  assert(cur_char() == '@');
401  next();
402  char c = cur_char();
403  if (!is_alpha(c))
404  throw parse_error("at_rule_name: first character of an at-rule name must be an alphabet.", offset());
405 
406  const char* p;
407  size_t len;
408  identifier(p, len);
409  skip_blanks();
410 
411  m_handler.at_rule_name({p, len});
412 #if ORCUS_DEBUG_CSS
413  std::string foo(p, len);
414  std::cout << "at-rule name: " << foo.c_str() << std::endl;
415 #endif
416 }
417 
418 template<typename _Handler>
419 void css_parser<_Handler>::simple_selector_name()
420 {
421  assert(has_char());
422  char c = cur_char();
423  if (c == '@')
424  {
425  // This is the name of an at-rule.
426  at_rule_name();
427  return;
428  }
429 
430  if (m_simple_selector_count)
431  {
432 #if ORCUS_DEBUG_CSS
433  cout << "combinator: " << m_combinator << endl;
434 #endif
435  m_handler.combinator(m_combinator);
436  m_combinator = css::combinator_t::descendant;
437  }
438  assert(is_alpha(c) || c == '.' || c == '#');
439 
440  const char* p = nullptr;
441  size_t n = 0;
442 
443 #if ORCUS_DEBUG_CSS
444  cout << "simple_selector_name: (" << m_simple_selector_count << ")";
445 #endif
446 
447  if (c != '.' && c != '#')
448  {
449  identifier(p, n);
450 #if ORCUS_DEBUG_CSS
451  std::string s(p, n);
452  cout << " type=" << s;
453 #endif
454  m_handler.simple_selector_type({p, n});
455  }
456 
457  bool in_loop = true;
458  while (in_loop && has_char())
459  {
460  switch (cur_char())
461  {
462  case '.':
463  {
464  next();
465  identifier(p, n);
466  m_handler.simple_selector_class({p, n});
467 #if ORCUS_DEBUG_CSS
468  std::string s(p, n);
469  std::cout << " class=" << s;
470 #endif
471  }
472  break;
473  case '#':
474  {
475  next();
476  identifier(p, n);
477  m_handler.simple_selector_id({p, n});
478 #if ORCUS_DEBUG_CSS
479  std::string s(p, n);
480  std::cout << " id=" << s;
481 #endif
482  }
483  break;
484  case ':':
485  {
486  // This could be either a pseudo element or pseudo class.
487  next();
488  if (cur_char() == ':')
489  {
490  // pseudo element.
491  next();
492  identifier(p, n);
493  css::pseudo_element_t elem = css::to_pseudo_element({p, n});
494  if (!elem)
495  parse_error::throw_with(
496  "selector_name: unknown pseudo element '", {p, n}, "'", offset());
497 
498  m_handler.simple_selector_pseudo_element(elem);
499  }
500  else
501  {
502  // pseudo class (or pseudo element in the older version of CSS).
503  identifier(p, n);
504  css::pseudo_class_t pc = css::to_pseudo_class({p, n});
505  if (!pc)
506  parse_error::throw_with(
507  "selector_name: unknown pseudo class '", {p, n}, "'", offset());
508 
509  m_handler.simple_selector_pseudo_class(pc);
510  }
511  }
512  break;
513  default:
514  in_loop = false;
515  }
516  }
517 
518  m_handler.end_simple_selector();
519  skip_comments_and_blanks();
520 
521  ++m_simple_selector_count;
522 
523 #if ORCUS_DEBUG_CSS
524  std::cout << std::endl;
525 #endif
526 }
527 
528 template<typename _Handler>
529 void css_parser<_Handler>::property_name()
530 {
531  // <identifier>
532 
533  assert(has_char());
534  char c = cur_char();
535  if (!is_alpha(c) && c != '.')
536  parse_error::throw_with(
537  "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'", offset());
538 
539  const char* p;
540  size_t len;
541  identifier(p, len);
542  skip_comments_and_blanks();
543 
544  m_handler.property_name({p, len});
545 #if ORCUS_DEBUG_CSS
546  std::string foo(p, len);
547  std::cout << "property name: " << foo.c_str() << std::endl;
548 #endif
549 }
550 
551 template<typename _Handler>
552 void css_parser<_Handler>::property()
553 {
554  // <property name> : <value> , ... , <value>
555 
556  m_handler.begin_property();
557  property_name();
558  if (cur_char() != ':')
559  throw parse_error("property: ':' expected.", offset());
560  next();
561  skip_comments_and_blanks();
562 
563  bool in_loop = true;
564  while (in_loop && has_char())
565  {
566  value();
567  char c = cur_char();
568  switch (c)
569  {
570  case ',':
571  {
572  // separated by commas.
573  next();
574  skip_comments_and_blanks();
575  }
576  break;
577  case ';':
578  case '}':
579  in_loop = false;
580  break;
581  default:
582  ;
583  }
584  }
585 
586  skip_comments_and_blanks();
587  m_handler.end_property();
588 }
589 
590 template<typename _Handler>
591 void css_parser<_Handler>::quoted_value(char c)
592 {
593  // Parse until the the end quote is reached.
594  const char* p = nullptr;
595  size_t len = 0;
596  literal(p, len, c);
597  next();
598  skip_blanks();
599 
600  m_handler.value({p, len});
601 #if ORCUS_DEBUG_CSS
602  std::string foo(p, len);
603  std::cout << "quoted value: " << foo.c_str() << std::endl;
604 #endif
605 }
606 
607 template<typename _Handler>
608 void css_parser<_Handler>::value()
609 {
610  assert(has_char());
611  char c = cur_char();
612  if (c == '"' || c == '\'')
613  {
614  quoted_value(c);
615  return;
616  }
617 
618  std::string_view v = parse_value();
619  if (v.empty())
620  return;
621 
622  if (cur_char() == '(')
623  {
624  function_value(v);
625  return;
626  }
627 
628  m_handler.value(v);
629 
630  skip_comments_and_blanks();
631 
632 #if ORCUS_DEBUG_CSS
633  std::cout << "value: " << v << std::endl;
634 #endif
635 }
636 
637 template<typename _Handler>
638 void css_parser<_Handler>::function_value(std::string_view v)
639 {
640  assert(cur_char() == '(');
641  css::property_function_t func = css::to_property_function(v);
642  if (func == css::property_function_t::unknown)
643  parse_error::throw_with("function_value: unknown function '", v, "'", offset());
644 
645  // Move to the first character of the first argument.
646  next();
647  skip_comments_and_blanks();
648 
649  switch (func)
650  {
651  case css::property_function_t::rgb:
652  function_rgb(false);
653  break;
654  case css::property_function_t::rgba:
655  function_rgb(true);
656  break;
657  case css::property_function_t::hsl:
658  function_hsl(false);
659  break;
660  case css::property_function_t::hsla:
661  function_hsl(true);
662  break;
663  case css::property_function_t::url:
664  function_url();
665  break;
666  default:
667  parse_error::throw_with("function_value: unhandled function '", v, "'", offset());
668  }
669 
670  char c = cur_char();
671  if (c != ')')
672  parse_error::throw_with("function_value: ')' expected but '", c, "' found.", offset());
673 
674  next();
675  skip_comments_and_blanks();
676 }
677 
678 template<typename _Handler>
679 void css_parser<_Handler>::function_rgb(bool alpha)
680 {
681  // rgb(num, num, num) rgba(num, num, num, float)
682 
683  uint8_t vals[3];
684  uint8_t* p = vals;
685  const uint8_t* plast = p + 2;
686  char c = 0;
687 
688  for (; ; ++p)
689  {
690  *p = parse_uint8();
691 
692  skip_comments_and_blanks();
693 
694  if (p == plast)
695  break;
696 
697  c = cur_char();
698 
699  if (c != ',')
700  parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset());
701 
702  next();
703  skip_comments_and_blanks();
704  }
705 
706  if (alpha)
707  {
708  c = cur_char();
709  if (c != ',')
710  parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset());
711 
712  next();
713  skip_comments_and_blanks();
714 
715  double alpha_val = parse_double_or_throw();
716 
717  alpha_val = std::clamp(alpha_val, 0.0, 1.0);
718  m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
719  }
720  else
721  m_handler.rgb(vals[0], vals[1], vals[2]);
722 
723 #if ORCUS_DEBUG_CSS
724  std::cout << "rgb";
725  if (alpha)
726  std::cout << 'a';
727  std::cout << '(';
728  p = vals;
729  const uint8_t* pend = plast + 1;
730  for (; p != pend; ++p)
731  std::cout << ' ' << (int)*p;
732  std::cout << " )" << std::endl;
733 #endif
734 }
735 
736 template<typename _Handler>
737 void css_parser<_Handler>::function_hsl(bool alpha)
738 {
739  // hsl(num, percent, percent) hsla(num, percent, percent, float)
740 
741  double hue = parse_double_or_throw(); // casted to uint8_t eventually.
742  hue = std::clamp(hue, 0.0, 360.0);
743  skip_comments_and_blanks();
744 
745  char c = cur_char();
746  if (c != ',')
747  parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
748 
749  next();
750  skip_comments_and_blanks();
751 
752  double sat = parse_percent();
753  sat = std::clamp(sat, 0.0, 100.0);
754  skip_comments_and_blanks();
755 
756  c = cur_char();
757  if (c != ',')
758  parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
759 
760  next();
761  skip_comments_and_blanks();
762 
763  double light = parse_percent();
764  light = std::clamp(light, 0.0, 100.0);
765  skip_comments_and_blanks();
766 
767  if (!alpha)
768  {
769  m_handler.hsl(hue, sat, light);
770  return;
771  }
772 
773  c = cur_char();
774  if (c != ',')
775  parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
776 
777  next();
778  skip_comments_and_blanks();
779 
780  double alpha_val = parse_double_or_throw();
781  alpha_val = std::clamp(alpha_val, 0.0, 1.0);
782  skip_comments_and_blanks();
783  m_handler.hsla(hue, sat, light, alpha_val);
784 }
785 
786 template<typename _Handler>
787 void css_parser<_Handler>::function_url()
788 {
789  char c = cur_char();
790 
791  if (c == '"' || c == '\'')
792  {
793  // Quoted URL value.
794  const char* p;
795  size_t len;
796  literal(p, len, c);
797  next();
798  skip_comments_and_blanks();
799  m_handler.url({p, len});
800 #if ORCUS_DEBUG_CSS
801  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
802 #endif
803  return;
804  }
805 
806  // Unquoted URL value.
807  const char* p;
808  size_t len;
809  skip_to_or_blank(p, len, ")");
810  skip_comments_and_blanks();
811  m_handler.url({p, len});
812 #if ORCUS_DEBUG_CSS
813  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
814 #endif
815 }
816 
817 template<typename _Handler>
818 void css_parser<_Handler>::name_sep()
819 {
820  assert(cur_char() == ',');
821 #if ORCUS_DEBUG_CSS
822  std::cout << "," << std::endl;
823 #endif
824  next();
825  skip_blanks();
826  m_handler.end_selector();
827 }
828 
829 template<typename _Handler>
830 void css_parser<_Handler>::property_sep()
831 {
832 #if ORCUS_DEBUG_CSS
833  std::cout << ";" << std::endl;
834 #endif
835  next();
836  skip_comments_and_blanks();
837 }
838 
839 template<typename _Handler>
840 void css_parser<_Handler>::block()
841 {
842  // '{' <property> ';' ... ';' <property> ';'(optional) '}'
843 
844  assert(cur_char() == '{');
845 #if ORCUS_DEBUG_CSS
846  std::cout << "{" << std::endl;
847 #endif
848  m_handler.end_selector();
849  m_handler.begin_block();
850 
851  next();
852  skip_comments_and_blanks();
853 
854  // parse properties.
855  while (has_char())
856  {
857  property();
858  if (cur_char() != ';')
859  break;
860  property_sep();
861  if (cur_char() == '}')
862  // ';' after the last property. This is optional but allowed.
863  break;
864  }
865 
866  if (cur_char() != '}')
867  throw parse_error("block: '}' expected.", offset());
868 
869  m_handler.end_block();
870 
871  next();
872  skip_comments_and_blanks();
873 
874 #if ORCUS_DEBUG_CSS
875  std::cout << "}" << std::endl;
876 #endif
877 }
878 
879 }
880 
881 #endif
882 
883 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: css_parser_base.hpp:22
Definition: css_parser.hpp:32
void url(std::string_view url)
Definition: css_parser.hpp:249
void end_parse()
Definition: css_parser.hpp:262
void end_block()
Definition: css_parser.hpp:274
void property_name(std::string_view name)
Definition: css_parser.hpp:177
void at_rule_name(std::string_view name)
Definition: css_parser.hpp:39
void hsl(uint8_t hue, uint8_t sat, uint8_t light)
Definition: css_parser.hpp:225
void end_property()
Definition: css_parser.hpp:291
void begin_parse()
Definition: css_parser.hpp:257
void end_selector()
Definition: css_parser.hpp:151
void begin_block()
Definition: css_parser.hpp:268
void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
Definition: css_parser.hpp:95
void simple_selector_class(std::string_view cls)
Definition: css_parser.hpp:75
void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
Definition: css_parser.hpp:213
void rgb(uint8_t red, uint8_t green, uint8_t blue)
Definition: css_parser.hpp:199
void begin_property()
Definition: css_parser.hpp:286
void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
Definition: css_parser.hpp:114
void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
Definition: css_parser.hpp:239
void simple_selector_id(std::string_view id)
Definition: css_parser.hpp:132
void combinator(orcus::css::combinator_t combinator)
Definition: css_parser.hpp:167
void simple_selector_type(std::string_view type)
Definition: css_parser.hpp:57
void value(std::string_view value)
Definition: css_parser.hpp:187
void end_simple_selector()
Definition: css_parser.hpp:143
Definition: css_parser.hpp:302
Definition: parser_base.hpp:23