Blame: vlib/encoding/xml/parser.v - vlang/v

vlang / v UNCLAIMED

Simple, fast, safe, compiled language for developing maintainable software. Compiles itself in <1s with zero library dependencies. Supports automatic C => V translation. https://vlang.io

0 0 0 V

Normal View History Raw

vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`module xml`

			`import io`
			`import os`
			`import strings`

all: unwrap const() blocks 2023-11-25 10:02:51 +03:00			`const default_prolog_attributes = {`
			`'version': '1.0'`
			`'encoding': 'UTF-8'`
			`}`
			`const default_string_builder_cap = 32`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
all: unwrap const() blocks 2023-11-25 10:02:51 +03:00			`const element_len = '<!ELEMENT'.len`
			`const entity_len = '<!ENTITY'.len`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
all: unwrap const() blocks 2023-11-25 10:02:51 +03:00			`const doctype_chars = 'OCTYPE'.bytes()`
			`const double_dash = '--'.bytes()`
			`const c_tag = '[C'.bytes()`
			`const data_chars = 'DATA'.bytes()`
encoding.xml: update parser logic to skip BOM before prolog (#19858) 2023-11-13 12:24:39 +00:00
all: unwrap const() blocks 2023-11-25 10:02:51 +03:00			`const byte_order_marking_first = u8(0xEF)`
			`const byte_order_marking_bytes = [u8(0xBB), 0xBF]`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`// Helper types to assist in parsing`

			`struct TextSpan {`
			`mut:`
			`start int`
			`end int`
			`}`

			`enum AttributeParserState {`
			`key`
			`eq`
			`value`
			`}`

			`fn parse_attributes(attribute_contents string) !map[string]string {`
			if attribute_contents.contains_u8(`<`) {
			`return error('Malformed XML. Found "<" in attribute string: "${attribute_contents}"')`
			`}`
			`mut attributes := map[string]string{}`

			`mut state := AttributeParserState.key`
			`mut key_span, mut value_span := TextSpan{}, TextSpan{}`

			`for index, ch in attribute_contents {`
			`match state {`
			`.key {`
			`match ch {`
			`=` {
			`state = AttributeParserState.eq`
			`}`
			`else {`
			`key_span.end++`
			`}`
			`}`
			`}`
			`.eq {`
			`match ch {`
			`=` {
			`return error('Duplicate "=" in attribute string: "${attribute_contents}"')`
			`}`
			`'`, `"` {
			`state = AttributeParserState.value`
			`value_span.start = index + 1`
			`}`
			`else {`
			`return error('Invalid character in attribute string: "${attribute_contents}"')`
			`}`
			`}`
			`}`
			`.value {`
			`match ch {`
			`'`, `"` {
			`state = AttributeParserState.key`
			`value_span.end = index`
			`attributes[attribute_contents[key_span.start..key_span.end].trim_space()] = attribute_contents[value_span.start..value_span.end]`

			`key_span.start = index + 1`
			`key_span.end = index + 1`
			`}`
			`else {`
			`state = AttributeParserState.value`
			`value_span.end++`
			`}`
			`}`
			`}`
			`}`
			`}`

			`return attributes`
			`}`

			`fn parse_comment(mut reader io.Reader) !XMLComment {`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`mut comment_buffer := strings.new_builder(default_string_builder_cap)`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`mut local_buf := [u8(0)]`
			`for {`
			`ch := next_char(mut reader, mut local_buf)!`
			`match ch {`
			`-` {
			`after_ch := next_char(mut reader, mut local_buf)!`
			if after_ch == `-` {
			if next_char(mut reader, mut local_buf)! == `>` {
			`break`
			`}`
			`return error('XML Comment not closed. Expected ">".')`
			`} else {`
			`comment_buffer.write_u8(ch)`
			`comment_buffer.write_u8(after_ch)`
			`}`
			`}`
			`else {`
			`comment_buffer.write_u8(ch)`
			`}`
			`}`
			`}`

			`comment_contents := comment_buffer.str()`
			`return XMLComment{comment_contents}`
			`}`

			`enum CDATAParserState {`
			`normal`
			`single`
			`double`
			`}`

			`fn parse_cdata(mut reader io.Reader) !XMLCData {`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`mut contents_buf := strings.new_builder(default_string_builder_cap)`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`mut state := CDATAParserState.normal`
			`mut local_buf := [u8(0)]`

			`for {`
			`ch := next_char(mut reader, mut local_buf)!`
			`contents_buf.write_u8(ch)`
			`match ch {`
			`]` {
			`match state {`
			`.double {`
			`// Another ] after the ]] for some reason. Keep the state`
			`}`
			`.single {`
			`state = .double`
			`}`
			`.normal {`
			`state = .single`
			`}`
			`}`
			`}`
			`>` {
			`match state {`
			`.double {`
			`break`
			`}`
			`else {`
			`state = .normal`
			`}`
			`}`
			`}`
			`else {`
			`state = .normal`
			`}`
			`}`
			`}`

			`contents := contents_buf.str().trim_space()`
			`if !contents.ends_with(']]>') {`
			`return error('CDATA section not closed.')`
			`}`
			`return XMLCData{contents[1..contents.len - 3]}`
			`}`

			`fn parse_entity(contents string) !(DTDEntity, string) {`
			`// We find the nearest '>' to the start of the ENTITY`
			`entity_end := contents.index('>') or { return error('Entity declaration not closed.') }`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`entity_contents := contents[entity_len..entity_end]`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`name := entity_contents.trim_left(' \t\n').all_before(' ')`
vlib: refactor empty string checks to use `s == ''` or `s != ''`, instead of `s.len == 0` (#21300) 2024-04-18 01:44:31 +02:00			`if name == '' {`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`return error('Entity is missing name.')`
			`}`
			`value := entity_contents.all_after_first(name).trim_space().trim('"\'')`
			`if value.len == 0 {`
			`return error('Entity is missing value.')`
			`}`

			`// TODO: Add support for SYSTEM and PUBLIC entities`

			`return DTDEntity{name, value}, contents[entity_end + 1..]`
			`}`

			`fn parse_element(contents string) !(DTDElement, string) {`
			`// We find the nearest '>' to the start of the ELEMENT`
			`element_end := contents.index('>') or { return error('Element declaration not closed.') }`
encoding.xml: fix handling of CRLF (windows EOLs) (fix #25345) (#25376) 2025-09-22 21:14:52 +02:00			`element_contents := contents[element_len..element_end].trim_left(' \t\r\n')`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`mut name_span := TextSpan{}`

			`for ch in element_contents {`
			`match ch {`
			` `, `\t`, `\n` {
			`break`
			`}`
			`// Valid characters in an entity name are:`
			`// 1. Lowercase alphabet - a-z`
			`// 2. Uppercase alphabet - A-Z`
			`// 3. Numbers - 0-9`
			`// 4. Underscore - _`
			`// 5. Colon - :`
			`// 6. Period - .`
			`a`...`z`, `A`...`Z`, `0`...`9`, `_`, `:`, `.` {
			`name_span.end++`
			`}`
			`else {`
			`return error('Invalid character in element name: "${ch}"')`
			`}`
			`}`
			`}`

			`name := element_contents[name_span.start..name_span.end].trim_left(' \t\n')`
vlib: refactor empty string checks to use `s == ''` or `s != ''`, instead of `s.len == 0` (#21300) 2024-04-18 01:44:31 +02:00			`if name == '' {`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`return error('Element is missing name.')`
			`}`
			`definition_string := element_contents.all_after_first(name).trim_space().trim('"\'')`

			`definition := if definition_string.starts_with('(') {`
			`// We have a list of possible children`

			`// Ensure that both ( and ) are present`
			`if !definition_string.ends_with(')') {`
			`return error('Element declaration not closed.')`
			`}`

			`definition_string.trim('()').split(',')`
			`} else {`
			`// Invalid definition`
			`return error('Invalid element definition: ${definition_string}')`
			`}`

			`// TODO: Add support for SYSTEM and PUBLIC entities`

			`return DTDElement{name, definition}, contents[element_end + 1..]`
			`}`

			`fn parse_doctype(mut reader io.Reader) !DocumentType {`
			`// We may have more < in the doctype so keep count`
			`mut depth := 1`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`mut doctype_buffer := strings.new_builder(default_string_builder_cap)`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`mut local_buf := [u8(0)]`
			`for {`
			`ch := next_char(mut reader, mut local_buf)!`
			`doctype_buffer.write_u8(ch)`
			`match ch {`
			`<` {
			`depth++`
			`}`
			`>` {
			`depth--`
			`if depth == 0 {`
			`break`
			`}`
			`}`
			`else {}`
			`}`
			`}`

			`doctype_contents := doctype_buffer.str().trim_space()`

			`name := doctype_contents.all_before('[').trim_space()`

			`mut list_contents := doctype_contents.all_after('[').all_before(']').trim_space()`
			`mut items := []DTDListItem{}`

			`for list_contents.len > 0 {`
			`if list_contents.starts_with('<!ENTITY') {`
			`entity, remaining := parse_entity(list_contents)!`
			`items << entity`
			`list_contents = remaining.trim_space()`
			`} else if list_contents.starts_with('<!ELEMENT') {`
			`element, remaining := parse_element(list_contents)!`
			`items << element`
			`list_contents = remaining.trim_space()`
			`} else {`
			`return error('Unknown DOCTYPE list item: ${list_contents}')`
			`}`
			`}`

			`return DocumentType{`
			`name: name`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`dtd: DocumentTypeDefinition{`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`list: items`
			`}`
			`}`
			`}`

			`fn parse_prolog(mut reader io.Reader) !(Prolog, u8) {`
encoding.xml: update parser logic to skip BOM before prolog (#19858) 2023-11-13 12:24:39 +00:00			`// Skip trailing whitespace and invalid characters`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`mut local_buf := [u8(0)]`
			`mut ch := next_char(mut reader, mut local_buf)!`
			`for {`
			`match ch {`
encoding.xml: update parser logic to skip BOM before prolog (#19858) 2023-11-13 12:24:39 +00:00			` `, `\t`, `\r`, `\n` {
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`ch = next_char(mut reader, mut local_buf)!`
			`continue`
			`}`
			`<` {
			`break`
			`}`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`byte_order_marking_first {`
encoding.xml: update parser logic to skip BOM before prolog (#19858) 2023-11-13 12:24:39 +00:00			`// UTF-8 BOM`
			`mut bom_buf := [u8(0), 0]`
			`if reader.read(mut bom_buf)! != 2 {`
			`return error('Invalid UTF-8 BOM.')`
			`}`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`if bom_buf != byte_order_marking_bytes {`
encoding.xml: update parser logic to skip BOM before prolog (#19858) 2023-11-13 12:24:39 +00:00			`return error('Invalid UTF-8 BOM.')`
			`}`
			`ch = next_char(mut reader, mut local_buf)!`
			`continue`
			`}`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`else {`
			`return error('Expecting a prolog or root node starting with "<".')`
			`}`
			`}`
			`}`

			`ch = next_char(mut reader, mut local_buf)!`
			if ch != `?` {
			`return Prolog{}, ch`
			`}`

			`ch = next_char(mut reader, mut local_buf)!`
			if ch != `x` {
			`return error('Expecting a prolog starting with "<?x".')`
			`}`

			`ch = next_char(mut reader, mut local_buf)!`
			if ch != `m` {
			`return error('Expecting a prolog starting with "<?xm".')`
			`}`

			`ch = next_char(mut reader, mut local_buf)!`
			if ch != `l` {
			`return error('Expecting a prolog starting with "<?xml".')`
			`}`

fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`mut prolog_buffer := strings.new_builder(default_string_builder_cap)`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`// Keep reading character by character until we find the end of the prolog`
			`mut found_question_mark := false`

			`for {`
			`ch = next_char(mut reader, mut local_buf)!`
			`match ch {`
			`?` {
			`if found_question_mark {`
			`return error('Invalid prolog: Two question marks found in a row.')`
			`}`
			`found_question_mark = true`
			`}`
			`>` {
			`if found_question_mark {`
			`break`
			`}`
			`return error('Invalid prolog: Found ">" before "?".')`
			`}`
			`else {`
			`if found_question_mark {`
			`found_question_mark = false`
			prolog_buffer.write_u8(`?`)
			`}`
			`prolog_buffer.write_u8(ch)`
			`}`
			`}`
			`}`

			`prolog_attributes := prolog_buffer.str().trim_space()`

			`attributes := if prolog_attributes.len == 0 {`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`default_prolog_attributes`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`} else {`
			`parse_attributes(prolog_attributes)!`
			`}`

			`version := attributes['version'] or { return error('XML declaration missing version.') }`
			`encoding := attributes['encoding'] or { 'UTF-8' }`

			`mut comments := []XMLComment{}`
			`mut doctype := DocumentType{`
			`name: ''`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`dtd: ''`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`}`
			`mut found_doctype := false`
			`for {`
			`ch = next_char(mut reader, mut local_buf)!`
			`match ch {`
			` `, `\t`, `\n` {
			`continue`
			`}`
			`<` {
			`// We have a comment, DOCTYPE, or root node`
			`ch = next_char(mut reader, mut local_buf)!`
			`match ch {`
			`!` {
			`// A comment or DOCTYPE`
			`match next_char(mut reader, mut local_buf)! {`
			`-` {
			`// A comment`
			if next_char(mut reader, mut local_buf)! != `-` {
			`return error('Invalid comment.')`
			`}`
			`comments << parse_comment(mut reader)!`
			`}`
			`D` {
			`if found_doctype {`
			`return error('Duplicate DOCTYPE declaration.')`
			`}`
			`// <!D -> OCTYPE`
			`mut doc_buf := []u8{len: 6}`
			`if reader.read(mut doc_buf)! != 6 {`
			`return error('Invalid DOCTYPE.')`
			`}`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`if doc_buf != doctype_chars {`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`return error('Invalid DOCTYPE.')`
			`}`
			`found_doctype = true`
			`doctype = parse_doctype(mut reader)!`
			`}`
			`else {`
			`return error('Unsupported control sequence found in prolog.')`
			`}`
			`}`
			`}`
			`else {`
			`// We have found the start of the root node`
			`break`
			`}`
			`}`
			`}`
			`else {}`
			`}`
			`}`

			`return Prolog{`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`version: version`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`encoding: encoding`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`doctype: doctype`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`comments: comments`
			`}, ch`
			`}`

			`fn parse_children(name string, attributes map[string]string, mut reader io.Reader) !XMLNode {`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`mut inner_contents := strings.new_builder(default_string_builder_cap)`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`mut children := []XMLNodeContents{}`
			`mut local_buf := [u8(0)]`

			`for {`
			`ch := next_char(mut reader, mut local_buf)!`
			`match ch {`
			`<` {
			`second_char := next_char(mut reader, mut local_buf)!`
			`match second_char {`
			`!` {
			`// Comment, CDATA`
			`mut next_two := [u8(0), 0]`
			`if reader.read(mut next_two)! != 2 {`
			`return error('Invalid XML. Incomplete comment or CDATA declaration.')`
			`}`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`if next_two == double_dash {`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`// Comment`
			`comment := parse_comment(mut reader)!`
			`children << comment`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`} else if next_two == c_tag {`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`// <![CDATA -> DATA`
			`mut cdata_buf := []u8{len: 4}`
			`if reader.read(mut cdata_buf)! != 4 {`
			`return error('Invalid XML. Incomplete CDATA declaration.')`
			`}`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`if cdata_buf != data_chars {`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`return error('Invalid XML. Expected "CDATA" after "<![C".')`
			`}`
			`cdata := parse_cdata(mut reader)!`
			`children << cdata`
			`} else {`
			`return error('Invalid XML. Unknown control sequence: ${next_two.bytestr()}')`
			`}`
			`}`
			`/` {
			`// End of node`
			`mut node_end_buffer := []u8{len: name.len + 1}`
			`if reader.read(mut node_end_buffer)! != name.len + 1 {`
			`return error('Invalid XML. Incomplete node end.')`
			`}`

			`mut ending_chars := name.bytes()`
			ending_chars << `>`

			`if node_end_buffer != ending_chars {`
			`return error('XML node <${name}> not closed.')`
			`}`

			`collected_contents := inner_contents.str().trim_space()`
			`if collected_contents.len > 0 {`
			`// We have some inner text`
			`children << collected_contents.replace('\r\n', '\n')`
			`}`
			`return XMLNode{`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`name: name`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`attributes: attributes`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`children: children`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`}`
			`}`
			`else {`
			`// Start of child node`
			`child := parse_single_node(second_char, mut reader) or {`
			`if err.msg() == 'XML node cannot start with "</".' {`
			`return error('XML node <${name}> not closed.')`
			`} else {`
			`return err`
			`}`
			`}`
			`text := inner_contents.str().trim_space()`
			`if text.len > 0 {`
			`children << text.replace('\r\n', '\n')`
			`}`
			`children << child`
			`}`
			`}`
			`}`
			`else {`
			`inner_contents.write_u8(ch)`
			`}`
			`}`
			`}`
			`return error('XML node <${name}> not closed.')`
			`}`

encoding.xml: make functions public, add documentation, tests, fix attribute parsing for self-closing tags (#19901) 2023-11-16 18:13:36 +00:00			`// parse_single_node parses a single XML node from the reader. The first character of the tag is passed`
			`// in as the first_char parameter.`
			`// This function is meant to assist in parsing nested nodes one at a time. Using this function as`
			`// opposed to the recommended static functions makes it easier to parse smaller nodes in extremely large`
			`// XML documents without running out of memory.`
			`pub fn parse_single_node(first_char u8, mut reader io.Reader) !XMLNode {`
fmt: remove the prefixed module name of const names, that are in the same module (related #22183) (#22185) 2024-09-10 16:25:56 +08:00			`mut contents := strings.new_builder(default_string_builder_cap)`
encoding.xml: make tag name and attribute parsing more robust and cleaner (#19828) 2023-11-10 09:31:36 +00:00			`contents.write_u8(first_char)`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
encoding.xml: make tag name and attribute parsing more robust and cleaner (#19828) 2023-11-10 09:31:36 +00:00			`mut local_buf := [u8(0)]`
			`for {`
			`mut ch := next_char(mut reader, mut local_buf)!`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			if ch == `>` {
			`break`
			`}`
			`contents.write_u8(ch)`
			`}`

			`tag_contents := contents.str().trim_space()`

encoding.xml: fix handling of CRLF (windows EOLs) (fix #25345) (#25376) 2025-09-22 21:14:52 +02:00			`parts := tag_contents.split_any(' \t\r\n')`
encoding.xml: fix parsing for self closing empty tags (#19907) 2023-11-17 08:51:46 +00:00			`name := parts[0].trim_right('/')`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00
			`// Check if it is a self-closing tag`
			`if tag_contents.ends_with('/') {`
			`// We're not looking for children and inner text`
			`return XMLNode{`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`name: name`
encoding.xml: make functions public, add documentation, tests, fix attribute parsing for self-closing tags (#19901) 2023-11-16 18:13:36 +00:00			`attributes: parse_attributes(tag_contents[name.len..tag_contents.len - 1].trim_space())!`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`}`
			`}`

encoding.xml: make tag name and attribute parsing more robust and cleaner (#19828) 2023-11-10 09:31:36 +00:00			`attribute_string := tag_contents[name.len..].trim_space()`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`attributes := parse_attributes(attribute_string)!`

			`return parse_children(name, attributes, mut reader)`
			`}`

			`// XMLDocument.from_string parses an XML document from a string.`
			`pub fn XMLDocument.from_string(raw_contents string) !XMLDocument {`
			`mut reader := FullBufferReader{`
			`contents: raw_contents.bytes()`
			`}`
			`return XMLDocument.from_reader(mut reader)!`
			`}`

			`// XMLDocument.from_file parses an XML document from a file. Note that the file is read in its entirety`
			`// and then parsed. If the file is too large, try using the XMLDocument.from_reader function instead.`
			`pub fn XMLDocument.from_file(path string) !XMLDocument {`
			`mut reader := FullBufferReader{`
			`contents: os.read_bytes(path)!`
			`}`
			`return XMLDocument.from_reader(mut reader)!`
			`}`

			`// XMLDocument.from_reader parses an XML document from a reader. This is the most generic way to parse`
			`// an XML document from any arbitrary source that implements that io.Reader interface.`
			`pub fn XMLDocument.from_reader(mut reader io.Reader) !XMLDocument {`
			`prolog, first_char := parse_prolog(mut reader) or {`
			`if err is os.Eof \|\| err is io.Eof \|\| err.msg() == 'Unexpected End Of File.' {`
			`return error('XML document is empty.')`
			`} else {`
			`return err`
			`}`
			`}`

			`root := parse_single_node(first_char, mut reader)!`

			`return XMLDocument{`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`version: prolog.version`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`encoding: prolog.encoding`
			`comments: prolog.comments`
fmt: fix alignment of struct init fields (#22025) 2024-08-11 14:11:24 +08:00			`doctype: prolog.doctype`
			`root: root`
vlib: add an `encoding.xml` module with parser, validation, entity encoding, unit tests (#19708) 2023-11-06 13:14:30 +00:00			`}`
			`}`