SIGN IN SIGN UP
2013-10-24 20:25:52 +02:00
module Docs
class Parser
2016-12-04 11:26:23 -05:00
attr_reader :title, :html
2013-10-24 20:25:52 +02:00
def initialize(content)
@content = content
2016-12-04 11:26:23 -05:00
@html = document? ? parse_as_document : parse_as_fragment
2013-10-24 20:25:52 +02:00
end
private
DOCUMENT_RGX = /\A(?:\s|(?:<!--.*?-->))*<(?:\!doctype|html)/i
2013-10-24 20:25:52 +02:00
def document?
@content =~ DOCUMENT_RGX
2013-10-24 20:25:52 +02:00
end
def parse_as_document
document = Nokogiri::HTML.parse @content, nil, 'UTF-8'
2016-12-04 11:26:23 -05:00
@title = document.at_css('title').try(:content)
document
2013-10-24 20:25:52 +02:00
end
def parse_as_fragment
Nokogiri::HTML.fragment @content, 'UTF-8'
end
end
end