Class: Oga::XML::Parser
- Inherits:
-
LL::Driver
- Object
- LL::Driver
- Oga::XML::Parser
show all
- Defined in:
- lib/oga/xml/parser.rb
Overview
DOM parser for both XML and HTML.
This parser does not produce a dedicated AST, instead it emits XML nodes
directly. Basic usage of this parser is as following:
parser = Oga::XML::Parser.new('<foo></foo>')
document = parser.parse
To enable HTML parsing you’d use the following instead:
parser = Oga::XML::Parser.new('<foo></foo>', :html => true)
document = parser.parse
In both cases you can use either a String or an IO as the parser input. IO
instances will result in lower memory overhead, especially when parsing large
files.
Constant Summary
collapse
- CONFIG =
LL::DriverConfig.new
- TOKEN_ERROR_MAPPING =
Hash mapping token types and dedicated error labels.
{
:T_STRING => 'string',
:T_TEXT => 'text',
:T_DOCTYPE_START => 'doctype start',
:T_DOCTYPE_END => 'doctype closing tag',
:T_DOCTYPE_TYPE => 'doctype type',
:T_DOCTYPE_NAME => 'doctype name',
:T_DOCTYPE_INLINE => 'inline doctype rules',
:T_CDATA => 'CDATA',
:T_COMMENT => 'comment',
:T_ELEM_START => 'element start',
:T_ELEM_NAME => 'element name',
:T_ELEM_NS => 'element namespace',
:T_ELEM_END => 'element closing tag',
:T_ATTR => 'attribute',
:T_ATTR_NS => 'attribute namespace',
:T_XML_DECL_START => 'XML declaration start',
:T_XML_DECL_END => 'XML declaration end',
:T_PROC_INS_START => 'processing-instruction start',
:T_PROC_INS_NAME => 'processing-instruction name',
:T_PROC_INS_END => 'processing-instruction closing tag',
-1 => 'end of input'
}
Instance Method Summary
collapse
-
#_rule_0(val) ⇒ Object
-
#_rule_1(val) ⇒ Object
-
#_rule_10(val) ⇒ Object
-
#_rule_11(val) ⇒ Object
-
#_rule_12(val) ⇒ Object
-
#_rule_13(val) ⇒ Object
-
#_rule_14(val) ⇒ Object
-
#_rule_15(val) ⇒ Object
-
#_rule_16(val) ⇒ Object
-
#_rule_17(val) ⇒ Object
-
#_rule_18(val) ⇒ Object
-
#_rule_19(val) ⇒ Object
-
#_rule_2(val) ⇒ Object
-
#_rule_20(val) ⇒ Object
-
#_rule_21(val) ⇒ Object
-
#_rule_22(val) ⇒ Object
-
#_rule_23(val) ⇒ Object
-
#_rule_24(val) ⇒ Object
-
#_rule_25(val) ⇒ Object
-
#_rule_26(val) ⇒ Object
-
#_rule_27(val) ⇒ Object
-
#_rule_28(val) ⇒ Object
-
#_rule_29(val) ⇒ Object
-
#_rule_3(val) ⇒ Object
-
#_rule_30(val) ⇒ Object
-
#_rule_31(val) ⇒ Object
-
#_rule_32(val) ⇒ Object
-
#_rule_33(val) ⇒ Object
-
#_rule_34(val) ⇒ Object
-
#_rule_35(val) ⇒ Object
-
#_rule_36(val) ⇒ Object
-
#_rule_37(val) ⇒ Object
-
#_rule_38(val) ⇒ Object
-
#_rule_39(val) ⇒ Object
-
#_rule_4(val) ⇒ Object
-
#_rule_40(val) ⇒ Object
-
#_rule_41(val) ⇒ Object
-
#_rule_42(val) ⇒ Object
-
#_rule_5(val) ⇒ Object
-
#_rule_6(val) ⇒ Object
-
#_rule_7(val) ⇒ Object
-
#_rule_8(val) ⇒ Object
-
#_rule_9(val) ⇒ Object
-
#after_element(element) ⇒ Oga::XML::Element
-
#each_token {|| ... } ⇒ Object
Yields the next token from the lexer.
-
#initialize(data, options = {}) ⇒ Parser
constructor
A new instance of Parser.
-
#on_attribute(name, ns_name = nil, value = nil) ⇒ Oga::XML::Attribute
-
#on_attributes(attrs) ⇒ Object
-
#on_cdata(text = nil) ⇒ Oga::XML::Cdata
-
#on_comment(text = nil) ⇒ Oga::XML::Comment
-
#on_doctype(options = {}) ⇒ Object
-
#on_document(children = []) ⇒ Oga::XML::Document
-
#on_element(namespace, name, attributes = {}) ⇒ Oga::XML::Element
-
#on_element_children(element, children = []) ⇒ Oga::XML::Element
-
#on_proc_ins(name, text = nil) ⇒ Oga::XML::ProcessingInstruction
-
#on_text(text) ⇒ Oga::XML::Text
-
#on_xml_decl(attributes = []) ⇒ Oga::XML::XmlDeclaration
-
#parser_error(stack_type, stack_value, token_type, token_value) ⇒ Object
Constructor Details
#initialize(data, options = {}) ⇒ Parser
Returns a new instance of Parser
212
213
214
215
216
217
|
# File 'lib/oga/xml/parser.rb', line 212
def initialize(data, options = {})
@data = data
@lexer = Lexer.new(data, options)
@line = 1
@lexer.reset_native
end
|
Instance Method Details
#_rule_0(val) ⇒ Object
362
363
364
|
# File 'lib/oga/xml/parser.rb', line 362
def _rule_0(val)
on_document(val[0])
end
|
#_rule_1(val) ⇒ Object
366
367
368
|
# File 'lib/oga/xml/parser.rb', line 366
def _rule_1(val)
val[0]
end
|
#_rule_10(val) ⇒ Object
410
411
412
|
# File 'lib/oga/xml/parser.rb', line 410
def _rule_10(val)
val[0].inject(:+)
end
|
#_rule_11(val) ⇒ Object
414
415
416
|
# File 'lib/oga/xml/parser.rb', line 414
def _rule_11(val)
on_cdata(val[1])
end
|
#_rule_12(val) ⇒ Object
418
419
420
|
# File 'lib/oga/xml/parser.rb', line 418
def _rule_12(val)
val[0] + val[1]
end
|
#_rule_13(val) ⇒ Object
422
423
424
|
# File 'lib/oga/xml/parser.rb', line 422
def _rule_13(val)
''
end
|
#_rule_14(val) ⇒ Object
426
427
428
|
# File 'lib/oga/xml/parser.rb', line 426
def _rule_14(val)
(val[1])
end
|
#_rule_15(val) ⇒ Object
430
431
432
|
# File 'lib/oga/xml/parser.rb', line 430
def _rule_15(val)
val[0] + val[1]
end
|
#_rule_16(val) ⇒ Object
434
435
436
|
# File 'lib/oga/xml/parser.rb', line 434
def _rule_16(val)
''
end
|
#_rule_17(val) ⇒ Object
438
439
440
441
442
|
# File 'lib/oga/xml/parser.rb', line 438
def _rule_17(val)
on_proc_ins(val[1], val[2])
end
|
#_rule_18(val) ⇒ Object
444
445
446
|
# File 'lib/oga/xml/parser.rb', line 444
def _rule_18(val)
val[0] + val[1]
end
|
#_rule_19(val) ⇒ Object
448
449
450
|
# File 'lib/oga/xml/parser.rb', line 448
def _rule_19(val)
''
end
|
#_rule_2(val) ⇒ Object
370
371
372
|
# File 'lib/oga/xml/parser.rb', line 370
def _rule_2(val)
val[0]
end
|
#_rule_20(val) ⇒ Object
452
453
454
|
# File 'lib/oga/xml/parser.rb', line 452
def _rule_20(val)
[nil, val[0]]
end
|
#_rule_21(val) ⇒ Object
456
457
458
|
# File 'lib/oga/xml/parser.rb', line 456
def _rule_21(val)
val
end
|
#_rule_22(val) ⇒ Object
460
461
462
463
464
|
# File 'lib/oga/xml/parser.rb', line 460
def _rule_22(val)
on_element(val[0][0], val[0][1], val[1])
end
|
#_rule_23(val) ⇒ Object
466
467
468
469
470
471
472
473
474
|
# File 'lib/oga/xml/parser.rb', line 466
def _rule_23(val)
if val[0]
on_element_children(val[0], val[1])
end
after_element(val[0])
end
|
#_rule_24(val) ⇒ Object
476
477
478
|
# File 'lib/oga/xml/parser.rb', line 476
def _rule_24(val)
on_attributes(val[0])
end
|
#_rule_25(val) ⇒ Object
480
481
482
|
# File 'lib/oga/xml/parser.rb', line 480
def _rule_25(val)
on_attribute(val[1], val[0], val[2])
end
|
#_rule_26(val) ⇒ Object
484
485
486
|
# File 'lib/oga/xml/parser.rb', line 484
def _rule_26(val)
on_attribute(val[0], nil, val[1])
end
|
#_rule_27(val) ⇒ Object
488
489
490
|
# File 'lib/oga/xml/parser.rb', line 488
def _rule_27(val)
on_xml_decl(val[1])
end
|
#_rule_28(val) ⇒ Object
492
493
494
495
496
497
498
|
# File 'lib/oga/xml/parser.rb', line 492
def _rule_28(val)
text = val[1] ? val[0] + val[1] : val[0]
on_text(text)
end
|
#_rule_29(val) ⇒ Object
500
501
502
|
# File 'lib/oga/xml/parser.rb', line 500
def _rule_29(val)
val[1] ? val[0] + val[1] : val[0]
end
|
#_rule_3(val) ⇒ Object
374
375
376
|
# File 'lib/oga/xml/parser.rb', line 374
def _rule_3(val)
val[0]
end
|
#_rule_30(val) ⇒ Object
504
505
506
|
# File 'lib/oga/xml/parser.rb', line 504
def _rule_30(val)
nil
end
|
#_rule_31(val) ⇒ Object
508
509
510
|
# File 'lib/oga/xml/parser.rb', line 508
def _rule_31(val)
val[1]
end
|
#_rule_32(val) ⇒ Object
512
513
514
|
# File 'lib/oga/xml/parser.rb', line 512
def _rule_32(val)
val[1]
end
|
#_rule_33(val) ⇒ Object
516
517
518
|
# File 'lib/oga/xml/parser.rb', line 516
def _rule_33(val)
val[0] + val[1]
end
|
#_rule_34(val) ⇒ Object
520
521
522
|
# File 'lib/oga/xml/parser.rb', line 520
def _rule_34(val)
''
end
|
#_rule_35(val) ⇒ Object
524
525
526
|
# File 'lib/oga/xml/parser.rb', line 524
def _rule_35(val)
val[0]
end
|
#_rule_36(val) ⇒ Object
528
529
530
|
# File 'lib/oga/xml/parser.rb', line 528
def _rule_36(val)
val[0]
end
|
#_rule_37(val) ⇒ Object
532
533
534
|
# File 'lib/oga/xml/parser.rb', line 532
def _rule_37(val)
val[0]
end
|
#_rule_38(val) ⇒ Object
536
537
538
|
# File 'lib/oga/xml/parser.rb', line 536
def _rule_38(val)
val[0]
end
|
#_rule_39(val) ⇒ Object
540
541
542
|
# File 'lib/oga/xml/parser.rb', line 540
def _rule_39(val)
val[0]
end
|
#_rule_4(val) ⇒ Object
378
379
380
|
# File 'lib/oga/xml/parser.rb', line 378
def _rule_4(val)
val[0]
end
|
#_rule_40(val) ⇒ Object
544
545
546
|
# File 'lib/oga/xml/parser.rb', line 544
def _rule_40(val)
val[0]
end
|
#_rule_41(val) ⇒ Object
548
549
550
|
# File 'lib/oga/xml/parser.rb', line 548
def _rule_41(val)
val[0]
end
|
#_rule_42(val) ⇒ Object
552
553
554
|
# File 'lib/oga/xml/parser.rb', line 552
def _rule_42(val)
val[0]
end
|
#_rule_5(val) ⇒ Object
382
383
384
|
# File 'lib/oga/xml/parser.rb', line 382
def _rule_5(val)
val[0]
end
|
#_rule_6(val) ⇒ Object
386
387
388
|
# File 'lib/oga/xml/parser.rb', line 386
def _rule_6(val)
val[0]
end
|
#_rule_7(val) ⇒ Object
390
391
392
|
# File 'lib/oga/xml/parser.rb', line 390
def _rule_7(val)
val[0]
end
|
#_rule_8(val) ⇒ Object
394
395
396
|
# File 'lib/oga/xml/parser.rb', line 394
def _rule_8(val)
val[0]
end
|
#_rule_9(val) ⇒ Object
398
399
400
401
402
403
404
405
406
407
408
|
# File 'lib/oga/xml/parser.rb', line 398
def _rule_9(val)
on_doctype(
:name => val[1],
:type => val[2],
:public_id => val[3],
:system_id => val[4],
:inline_rules => val[5]
)
end
|
341
342
343
|
# File 'lib/oga/xml/parser.rb', line 341
def after_element(element)
element
end
|
#each_token {|| ... } ⇒ Object
Yields the next token from the lexer.
222
223
224
225
226
227
228
229
230
|
# File 'lib/oga/xml/parser.rb', line 222
def each_token
@lexer.advance do |type, value, line|
@line = line if line
yield [type, value]
end
yield [-1, -1]
end
|
#on_attribute(name, ns_name = nil, value = nil) ⇒ Oga::XML::Attribute
349
350
351
352
353
354
355
|
# File 'lib/oga/xml/parser.rb', line 349
def on_attribute(name, ns_name = nil, value = nil)
Attribute.new(
:namespace_name => ns_name,
:name => name,
:value => value
)
end
|
#on_attributes(attrs) ⇒ Object
358
359
360
|
# File 'lib/oga/xml/parser.rb', line 358
def on_attributes(attrs)
attrs
end
|
281
282
283
|
# File 'lib/oga/xml/parser.rb', line 281
def on_cdata(text = nil)
Cdata.new(:text => text)
end
|
287
288
289
|
# File 'lib/oga/xml/parser.rb', line 287
def (text = nil)
Comment.new(:text => text)
end
|
#on_doctype(options = {}) ⇒ Object
275
276
277
|
# File 'lib/oga/xml/parser.rb', line 275
def on_doctype(options = {})
Doctype.new(options)
end
|
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
|
# File 'lib/oga/xml/parser.rb', line 256
def on_document(children = [])
document = Document.new(:type => @lexer.html? ? :html : :xml)
children.each do |child|
if child.is_a?(Doctype)
document.doctype = child
elsif child.is_a?(XmlDeclaration)
document.xml_declaration = child
else
document.children << child
end
end
document
end
|
#on_element(namespace, name, attributes = {}) ⇒ Oga::XML::Element
320
321
322
323
324
325
326
327
328
|
# File 'lib/oga/xml/parser.rb', line 320
def on_element(namespace, name, attributes = {})
element = Element.new(
:namespace_name => namespace,
:name => name,
:attributes => attributes
)
element
end
|
#on_element_children(element, children = []) ⇒ Oga::XML::Element
333
334
335
336
337
|
# File 'lib/oga/xml/parser.rb', line 333
def on_element_children(element, children = [])
element.children = children
element
end
|
294
295
296
|
# File 'lib/oga/xml/parser.rb', line 294
def on_proc_ins(name, text = nil)
ProcessingInstruction.new(:name => name, :text => text)
end
|
312
313
314
|
# File 'lib/oga/xml/parser.rb', line 312
def on_text(text)
Text.new(:text => text)
end
|
300
301
302
303
304
305
306
307
308
|
# File 'lib/oga/xml/parser.rb', line 300
def on_xml_decl(attributes = [])
options = {}
attributes.each do |attr|
options[attr.name.to_sym] = attr.value
end
XmlDeclaration.new(options)
end
|
#parser_error(stack_type, stack_value, token_type, token_value) ⇒ Object
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
|
# File 'lib/oga/xml/parser.rb', line 236
def parser_error(stack_type, stack_value, token_type, token_value)
case id_to_type(stack_type)
when :rule
message = "Unexpected #{token_type} for rule #{stack_value}"
when :terminal
expected = id_to_terminal(stack_value)
expected = TOKEN_ERROR_MAPPING[expected] || expected
got = TOKEN_ERROR_MAPPING[token_type] || token_type
message = "Unexpected #{got}, expected #{expected} instead"
when :eof
message = 'Unexpected end of input'
end
message += " on line #{@line}"
raise LL::ParserError, message
end
|