diff --git a/Nasal/xml.nas b/Nasal/xml.nas new file mode 100644 index 000000000..3d7eb5605 --- /dev/null +++ b/Nasal/xml.nas @@ -0,0 +1,516 @@ +# XML parser that allows to parse XML files that don't follow the FlightGear standard by +# storing information in attributes, like the crappy Traffic Manager and AI definition +# files. Currently only reading from a string is supported, and the XML 1.0 standard +# isn't fully implemented. +# +# Synopsis: xml.process_string(, ); +# +# Example: +# var string = io.readfile("foo.xml"); +# var node = xml.process_string(string, xml.tree, "__"); +# if (node != nil) +# props.dump(node); +# +# The "interface" (xml.tree) is a hash with function members begin(), end(), open(), +# close(), and data(). Its methods are called by the parser: +# +# begin(...) +# called once at the beginning; the method gets all arguments but the +# first two from the xml.process_string() call. In the above example it +# gets the "__" as arg[0]. +# +# end() +# called once at the end; its return value is used as return value for +# xml.process_string() +# +# open(, , ) +# called for every opening tag (selfclosing=0) or self-closing empty +# tag (selfclosing=1). is the tag name, and is a hash +# with name/value string pairs. +# +# close(, ) +# called for every closing tag, with tag name and number of child +# elements. From the latter it can be determined if the closed tag +# is a branch or a leaf node. The close() method is also called for +# selfclosing tags, in which case is always 0. +# +# data() +# called for each data segment +# +# Example: +# +# 123456 +# +# would cause these interface calls: +# +# open("foo", {}, 0); +# data(123); +# open("bar", { this: "is", a: "test" }, 1); +# close("bar", 0); +# data(456); +# close("foo", 1); +# +# +# Predefined are two interfaces: +# +# xml.tree +# +# Synposis: = xml.process_string(, xml.tree, ); +# +# Example: var node = xml.process_string("bar", xml.tree, "attr__"); +# +# This parses the and returns it as props.Node property tree, +# which can then be processed with the known property methods, or copied +# to the main property tree: props.copy(node, props.globals.getNode("whatever", 1)); +# Attributes are added as regular nodes, whereby the string is +# prepended to the attribute names. If collisions can be ruled out, then this +# prefix can be an empty string. If it's nil, then attributes are dropped +# altogether. FlightGear's standard attributes are *not* considered, as this +# parser is explicitly for non-standard XML sources. Standard files can +# easier be loaded with fgfs means. +# +# xml.dump +# +# Example: xml.process_string("bar", xml.dump); +# +# This dumps the input xml data to the terminal while parsing. It's meant for +# debugging purposes. +# +# +# +# A minimal interface hash can look like this: +# +# var empty = { +# begin : func {}, +# end : func {}, +# open : func {}, +# close : func {}, +# data : func {}, +# }; +# +# and would be used as: xml.process_string("bar", empty); + +var printf = func { print(call(sprintf, arg)) } + +var isspace = func(c) { c == ` ` or c == `\t` or c == `\n` or c == `\r` } +var isletter = func(c) { c >= `a` and c <= `z` or c >= `A` and c <= `Z` } +var isdigit = func(c) { c >= `0` and c <= `9` } +var isalnum = func(c) { isdigit(c) or isletter(c) } + +var istagfirst = func(c) { isletter(c) or c == `_` or c == `:` } +var istagother = func(c) { isalnum(c) or c == `_` or c == `:` or c == `-` or c == `.`} + +var ctab = { "lt" : `<`, "gt" : `>`, "amp" : `&`, "quot" : `"`, "apos" : `'` }; + +var xml_error = "__xml__"; +var error = func(msg) die(xml_error ~ msg ~ scan.location()); + + +# SCANNER ========================================================================================= + + +## +# virtual base class: must be derived, adding get() and put() +# +var Scanner = { + new : func { + var m = { parents : [Scanner] }; + m.line = 1; + m.column = 0; + return m; + }, + get : func die("get() method not implemented"), + put : func die("put() method not implemented"), + skip : func(w, spc = 1) { + spc and me.skip_spaces(); + var revert = []; + for (var i = 0; i < size(w); i += 1) { + var c = me.get(); + revert = [c] ~ revert; + if (c != w[i]) { + foreach (var r; revert) + me.put(r); + return 0; + } + } + return 1; + }, + getname : func { + var s = ""; + var c = me.get(); + if (!istagfirst(c)) { + me.put(c); + return nil; + } + s ~= chr(c); + while (1) { + c = me.get(); + if (!istagother(c)) + break; + s ~= chr(c); + } + me.put(c); + return s; + }, + getassign : func { + me.skip_spaces(); + if (me.get() != `=`) + error("equal sign expected in assignment"); + me.skip_spaces(); + var s = me.getstring(); + if (s == nil) + error("quoted string expected in assignment"); + return s; + }, + getstring : func(spc = 1) { + spc and me.skip_spaces(); + var delim = me.get(); + if (delim != `"` and delim != `'`) { + me.put(delim); + return nil; + } + var s = ""; + while ((var c = me.get()) != nil and c != delim) + s ~= chr(c == `&` ? me.special() : c); + if (c != delim) + error("string not closed with " ~ chr(delim)); + return s; + }, + special : func { + var s = ""; + var c = me.get(); + var n = nil; + if (c == `#`) { + while ((c = me.get()) != nil and isdigit(c) and c != `;`) + s ~= chr(c); + n = num(s); + } else { + me.put(c); + while ((c = me.get()) != nil and c != `;`) + s ~= chr(c); + } + if (c != `;`) + error("entity reference not closed with ;"); + if (n != nil) + return n; + if (!contains(ctab, s)) + error("unknown entity reference"); + return ctab[s]; + }, + skip_spaces : func { + var n = 0; + while (isspace(var c = me.get())) + n += 1; + me.put(c); + return n; + }, + setmark : func(c) { + if (c == `\n`) { + me.line += 1; + me.column = 0; + } else { + me.column += 1; + } + }, + location : func { + return " in line " ~ me.line ~ " at position " ~ me.column; + }, + dump : func { + var s = "REST=("; + while ((var c = me.get()) != nil) + s ~= chr(c); + error(s ~ ")"); + }, +}; + + +var StringScanner = { + new : func(s) { + var m = Scanner.new(); + m.parents = [StringScanner] ~ m.parents; + m.source = s; + m.pos = 0; + m.buf = []; + return m; + }, + get : func { + if (size(me.buf)) + return pop(me.buf); + if (me.pos >= size(me.source)) + return nil; + var c = me.source[me.pos]; + me.pos += 1; + me.setmark(c); + return c; + }, + put : func { + foreach (var c; arg) + append(me.buf, c); + return nil; + }, +}; + + +# PARSER ========================================================================================== + +var parse_xml = func { + var args = caller(0)[0]["arg"] == nil ? [] : arg; # FIXME nasal bug + call(action.begin, args, action); + parse_prolog(); + if (!parse_element()) { + var c = scan.get(); + if (c == nil) + error("document doesn't contain any data"); + scan.put(); + error("garbage"); + } + + parse_misc(); + if (scan.get() != nil) + error("trailing garbage"); + + return action.end(); +} + + +var parse_prolog = func { + parse_xmldecl(); + parse_misc(); + #parse_dtd(); + #parse_misc(); +} + + +var parse_xmldecl = func { + if (!scan.skip("")) + error("prolog not closed with ?>"); +} + + +var parse_misc = func { + while (parse_comment() or parse_pi()) { + } +} + + +var parse_comment = func { + if (!scan.skip("")) + return 1; + if (scan.skip("--")) + error("illegal use of -- in comment"); + scan.get(); + } + error("unfinished comment"); +} + + +var parse_pi = func { + # TODO + return 0; +} + + +var parse_rawdata = func { + var c = scan.get(); + if (c == `<`) { + scan.put(c); + return nil; + } + + var s = chr(c); + while ((c = scan.get()) != `<` and c != nil) + s ~= chr(c == `&` ? scan.special() : c); + scan.put(c); + return s; +} + + +var parse_cdsect = func { + if (!scan.skip("")) + return s; + var c = scan.get(); + if (c == nil) + break; + s ~= chr(c == `&` ? scan.special() : c); + } + error("unfinished CDATA section"); +} + + +var parse_element = func { + var open = parse_opening_tag(); + if (open == nil) + return 0; + if (open[2]) { + action.close(open[0], 0); + return 1; # tag was self-closing + } + + var children = 0; + while (1) { + if ((var close = parse_closing_tag()) != nil) + break; + parse_comment(); + if ((var d = parse_cdsect()) != nil) + action.data(d); + if ((var d = parse_rawdata()) != nil) + action.data(d); + children += parse_element(); + } + if (open[0] != close) + error("<" ~ open[0] ~ "> closed with <" ~ close ~ ">"); + action.close(close, children); + return 1; +} + + +var parse_opening_tag = func { + if (!scan.skip("<")) + return nil; + var c = scan.get(); + if (!istagfirst(c)) { + scan.put(c, `<`); + return nil; + } + scan.put(c); + var name = scan.getname(); # can't be nil + var attr = {}; + while (1) { + scan.skip_spaces(); + var n = scan.getname(); + if (n == nil) + break; + var v = scan.getassign(); + attr[n] = v; + } + scan.skip_spaces(); + if (scan.skip("/>")) + selfclosing = 1; + elsif (scan.skip(">")) + selfclosing = 0; + else + error("trailing garbage in opening tag"); + action.open(name, attr, selfclosing); + return [name, attr, selfclosing]; +} + + +var parse_closing_tag = func { + if (!scan.skip("")) + error("closing tag not ended with >"); + return name; +} + + +var scan = nil; +var action = nil; + +var process_string = func(s, a) { + scan = StringScanner.new(s); + action = a; + var err = []; + var args = caller(0)[0]["arg"] == nil ? [] : arg; # FIXME nasal bug + var ret = call(parse_xml, args, nil, nil, err); + if (!size(err)) + return ret; + if (substr(err[0], 0, size(xml_error)) == xml_error) { + print("XML: ", substr(err[0], size(xml_error))); + } else { + printf("%s at %s line %d", err[0], err[1], err[2]); + for (var i = 3; i < size(err); i += 2) + printf(" called from %s line %d", err[i], err[i + 1]); + } + return nil; +} + + + +var tree = { + begin : func(prefix = nil) { + me.attr_prefix = prefix; + me.stack = []; + me.root = me.node = props.Node.new(); + }, + end : func { + return me.node; + }, + open : func(name, attr, sc) { + append(me.stack, ""); + var index = size(me.node.getChildren(name)); + me.node = me.node.getChild(name, index, 1); + if (me.attr_prefix != nil) + foreach (var a; keys(attr)) + me.node.getNode(me.attr_prefix ~ a, 1).setValue(attr[a]); + }, + close : func(name, children) { + var buf = pop(me.stack); + if (!children and size(buf)) + me.node.setValue(buf); + me.node = me.node.getParent(); + }, + data : func(d) { + me.stack[-1] ~= d; + }, +}; + + +var dump = { + begin : func { + me.level = 0; + }, + end : func { + }, + open : func(name, attr, selfclosed) { + me.print("<", name, ">"); + me.level += 1; + foreach (var a; keys(attr)) { + me.print("" ~ attr[a] ~ ""); + } + }, + close : func(name, chld) { + me.level -= 1; + me.print(""); + }, + data : func(data) { + for (var i = 0; i < size(data); i += 1) + if (!isspace(data[i])) + return me.print("'", data, "'"); + }, + print : func { + var s = ""; + for (var i = 0; i < me.level; i += 1) + s ~= "\t"; + arg = [s] ~ arg; + call(print, arg); + }, +}; + +