class Ronn::RoffFilter
Filter for converting HTML to ROFF
Constants
- HTML_ROFF_ENTITIES
Public Class Methods
new(html_fragment, name, section, tagline, manual = nil, version = nil, date = nil)
click to toggle source
Convert Ronn
HTML to roff. The html input is an HTML fragment, not a complete document
# File lib/ronn/roff.rb 11 def initialize(html_fragment, name, section, tagline, manual = nil, 12 version = nil, date = nil) 13 @buf = [] 14 title_heading name, section, tagline, manual, version, date 15 doc = Nokogiri::HTML.fragment(html_fragment) 16 remove_extraneous_elements! doc 17 normalize_whitespace! doc 18 block_filter doc 19 write "\n" 20 end
Public Instance Methods
to_s()
click to toggle source
# File lib/ronn/roff.rb 22 def to_s 23 @buf.join.gsub(/[ \t]+$/, '') 24 end
Protected Instance Methods
block_filter(node)
click to toggle source
# File lib/ronn/roff.rb 88 def block_filter(node) 89 return if node.nil? 90 91 if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet) 92 node.each { |ch| block_filter(ch) } 93 94 elsif node.document? || node.fragment? 95 block_filter(node.children) 96 97 elsif node.text? 98 # This hack is necessary to support mixed-child-type dd's 99 inline_filter(node) 100 101 elsif node.elem? 102 case node.name 103 when 'html', 'body' 104 block_filter(node.children) 105 when 'div' 106 block_filter(node.children) 107 when 'h1' 108 # discard 109 nop 110 when 'h2' 111 macro 'SH', quote(escape(node.inner_html)) 112 when 'h3' 113 macro 'SS', quote(escape(node.inner_html)) 114 115 when 'p' 116 prev = previous(node) 117 if prev && %w[dd li blockquote].include?(node.parent.name) 118 macro 'IP' 119 elsif prev && !%w[h1 h2 h3].include?(prev.name) 120 macro 'P' 121 elsif node.previous&.text? 122 macro 'IP' 123 end 124 inline_filter(node.children) 125 126 when 'blockquote' 127 prev = previous(node) 128 indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name) 129 macro 'IP', %w["" 4] if indent 130 block_filter(node.children) 131 macro 'IP', %w["" 0] if indent 132 133 when 'pre' 134 prev = previous(node) 135 indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name) 136 macro 'IP', %w["" 4] if indent 137 macro 'nf' 138 # HACK: strip an initial \n to avoid extra spacing 139 if node.children && node.children[0].text? 140 text = node.children[0].to_s 141 node.children[0].replace(text[1..-1]) if text.start_with? "\n" 142 end 143 inline_filter(node.children) 144 macro 'fi' 145 macro 'IP', %w["" 0] if indent 146 147 when 'dl' 148 macro 'TP' 149 block_filter(node.children) 150 when 'dt' 151 prev = previous(node) 152 macro 'TP' unless prev.nil? 153 inline_filter(node.children) 154 write "\n" 155 when 'dd' 156 if node.at('p') 157 block_filter(node.children) 158 else 159 inline_filter(node.children) 160 end 161 write "\n" 162 163 when 'ol', 'ul' 164 block_filter(node.children) 165 macro 'IP', %w["" 0] 166 when 'li' 167 case node.parent.name 168 when 'ol' 169 macro 'IP', %W["#{node.parent.children.index(node) + 1}." 4] 170 when 'ul' 171 macro 'IP', ['"\\[ci]"', '4'] 172 else 173 raise "List element found as a child of non-list parent element: #{node.inspect}" 174 end 175 if node.at('p,ol,ul,dl,div') 176 block_filter(node.children) 177 else 178 inline_filter(node.children) 179 end 180 write "\n" 181 182 when 'span', 'code', 'b', 'strong', 'kbd', 'samp', 'var', 'em', 'i', 183 'u', 'br', 'a' 184 inline_filter(node) 185 186 when 'table' 187 macro 'TS' 188 write "allbox;\n" 189 block_filter(node.children) 190 macro 'TE' 191 when 'thead' 192 # Convert to format section and first row 193 tr = node.children[0] 194 header_contents = [] 195 cell_formats = [] 196 tr.children.each do |th| 197 style = th['style'] 198 cell_format = case style 199 when 'text-align:left;' 200 'l' 201 when 'text-align:right;' 202 'r' 203 when 'text-align:center;' 204 'c' 205 else 206 'l' 207 end 208 header_contents << th.inner_html 209 cell_formats << cell_format 210 end 211 write cell_formats.join(' ') + ".\n" 212 write header_contents.join("\t") + "\n" 213 when 'th' 214 raise 'internal error: unexpected <th> element' 215 when 'tbody' 216 # Let the 'tr' handle it 217 block_filter(node.children) 218 when 'tr' 219 # Convert to a table data row 220 node.children.each do |child| 221 block_filter(child) 222 write "\t" 223 end 224 write "\n" 225 when 'td' 226 inline_filter(node.children) 227 228 else 229 warn 'unrecognized block tag: %p', node.name 230 end 231 232 elsif node.is_a?(Nokogiri::XML::DTD) 233 # Ignore 234 nop 235 elsif node.is_a?(Nokogiri::XML::Comment) 236 # Ignore 237 nop 238 else 239 raise "unexpected node: #{node.inspect}" 240 end 241 end
comment(text)
click to toggle source
# File lib/ronn/roff.rb 371 def comment(text) 372 writeln %(.\\" #{text}) 373 end
escape(text)
click to toggle source
# File lib/ronn/roff.rb 334 def escape(text) 335 return text.to_s if text.nil? || text.empty? 336 ent = HTML_ROFF_ENTITIES 337 text = text.dup 338 text.gsub!(/&#x([0-9A-Fa-f]+);/) { $1.to_i(16).chr } # hex entities 339 text.gsub!(/&#(\d+);/) { $1.to_i.chr } # dec entities 340 text.gsub!('\\', '\e') # backslash 341 text.gsub!('...', '\|.\|.\|.') # ellipses 342 text.gsub!(/['.-]/) { |m| "\\#{m}" } # control chars 343 ent.each do |key, val| 344 text.gsub!(key, val) 345 end 346 text.gsub!('&', '&') # amps 347 text 348 end
inline_filter(node)
click to toggle source
# File lib/ronn/roff.rb 243 def inline_filter(node) 244 return unless node # is an empty node 245 246 if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet) 247 node.each { |ch| inline_filter(ch) } 248 249 elsif node.text? 250 text = node.to_html.dup 251 write escape(text) 252 253 elsif node.elem? 254 case node.name 255 when 'span' 256 inline_filter(node.children) 257 when 'code' 258 if child_of?(node, 'pre') 259 inline_filter(node.children) 260 else 261 write '\fB' 262 inline_filter(node.children) 263 write '\fR' 264 end 265 266 when 'b', 'strong', 'kbd', 'samp' 267 write '\fB' 268 inline_filter(node.children) 269 write '\fR' 270 271 when 'var', 'em', 'i', 'u' 272 write '\fI' 273 inline_filter(node.children) 274 write '\fR' 275 276 when 'br' 277 macro 'br' 278 279 when 'a' 280 if node.classes.include?('man-ref') 281 inline_filter(node.children) 282 elsif node.has_attribute?('data-bare-link') 283 write '\fI' 284 inline_filter(node.children) 285 write '\fR' 286 else 287 inline_filter(node.children) 288 write ' ' 289 write '\fI' 290 write escape(node.attributes['href'].content) 291 write '\fR' 292 end 293 294 when 'sup' 295 # This superscript equivalent is a big ugly hack. 296 write '^(' 297 inline_filter(node.children) 298 write ')' 299 300 else 301 warn 'unrecognized inline tag: %p', node.name 302 end 303 304 else 305 raise "unexpected node: #{node.inspect}" 306 end 307 end
macro(name, value = nil)
click to toggle source
# File lib/ronn/roff.rb 313 def macro(name, value = nil) 314 maybe_new_line 315 writeln ".#{[name, value].compact.join(' ')}" 316 end
maybe_new_line()
click to toggle source
# File lib/ronn/roff.rb 309 def maybe_new_line 310 write "\n" if @buf.last && @buf.last[-1] != "\n" 311 end
nop()
click to toggle source
# File lib/ronn/roff.rb 379 def nop 380 # Do nothing 381 end
normalize_whitespace!(node)
click to toggle source
# File lib/ronn/roff.rb 52 def normalize_whitespace!(node) 53 if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet) 54 node.to_a.dup.each { |ch| normalize_whitespace! ch } 55 elsif node.text? 56 preceding = node.previous 57 following = node.next 58 content = node.content.gsub(/[\n ]+/m, ' ') 59 if preceding.nil? || block_element?(preceding.name) || 60 preceding.name == 'br' 61 content.lstrip! 62 end 63 if following.nil? || block_element?(following.name) || 64 following.name == 'br' 65 content.rstrip! 66 end 67 if content.empty? 68 node.remove 69 else 70 node.content = content 71 end 72 elsif node.elem? && node.name == 'pre' 73 # stop traversing 74 elsif node.elem? && node.children 75 normalize_whitespace! node.children 76 elsif node.elem? 77 # element has no children 78 elsif node.document? || node.fragment? 79 normalize_whitespace! node.children 80 elsif node.is_a?(Nokogiri::XML::DTD) || node.is_a?(Nokogiri::XML::Comment) 81 # ignore 82 nop 83 else 84 warn 'unexpected node during whitespace normalization: %p', node 85 end 86 end
previous(node)
click to toggle source
# File lib/ronn/roff.rb 28 def previous(node) 29 return unless node.respond_to?(:previous) 30 prev = node.previous 31 prev = prev.previous until prev.nil? || prev.elem? 32 prev 33 end
quote(text)
click to toggle source
# File lib/ronn/roff.rb 350 def quote(text) 351 "\"#{text.gsub(/"/, '\\"')}\"" 352 end
remove_extraneous_elements!(doc)
click to toggle source
# File lib/ronn/roff.rb 46 def remove_extraneous_elements!(doc) 47 doc.traverse do |node| 48 node.parent.children.delete(node) if node.comment? 49 end 50 end
title_heading(name, section, _tagline, manual, version, date)
click to toggle source
# File lib/ronn/roff.rb 35 def title_heading(name, section, _tagline, manual, version, date) 36 comment "generated with Ronn-NG/v#{Ronn.version}" 37 comment "http://github.com/apjanke/ronn-ng/tree/#{Ronn.revision}" 38 return if name.nil? 39 if manual 40 macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}" "#{manual}") 41 else 42 macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}") 43 end 44 end
warn(text, *args)
click to toggle source
# File lib/ronn/roff.rb 375 def warn(text, *args) 376 Kernel.warn format("warn: #{text}", args) 377 end
write(text)
click to toggle source
write text to output buffer
# File lib/ronn/roff.rb 355 def write(text) 356 return if text.nil? || text.empty? 357 # lines cannot start with a '.'. insert zero-width character before. 358 text = text.gsub(/\n\\\./, "\n\\\\&\\.") 359 buf_ends_in_newline = @buf.last && @buf.last[-1] == "\n" 360 @buf << '\&' if text[0, 2] == '\.' && buf_ends_in_newline 361 @buf << text 362 end
writeln(text)
click to toggle source
write text to output buffer on a new line.
# File lib/ronn/roff.rb 365 def writeln(text) 366 maybe_new_line 367 write text 368 write "\n" 369 end