1
0
Fork 0

simple xml parser that allows to read non-fgfs-standard xml files such as

those from traffic manager and AI, so that it will later be possible to
load the data into the UFO Scenery Object Editor, to visualize and edit
parking positions, taxiing routes etc. The parser also allows to suck
arbitrary (simple) XML data files into fgfs from Nasal context.

In the long run the non-standard XML files in fgfs shall, of course, be
converted to standard format, in which no data may be stored in attributes.
This commit is contained in:
mfranz 2007-05-02 20:55:54 +00:00
parent 24b8156e9f
commit af56aed55e

516
Nasal/xml.nas Normal file
View file

@ -0,0 +1,516 @@
# XML parser that allows to parse XML files that don't follow the FlightGear standard by
# storing information in attributes, like the crappy Traffic Manager and AI definition
# files. Currently only reading from a string is supported, and the XML 1.0 standard
# isn't fully implemented.
#
# Synopsis: xml.process_string(<xml-data:string>, <interface:hash>);
#
# Example:
# var string = io.readfile("foo.xml");
# var node = xml.process_string(string, xml.tree, "__");
# if (node != nil)
# props.dump(node);
#
# The "interface" (xml.tree) is a hash with function members begin(), end(), open(),
# close(), and data(). Its methods are called by the parser:
#
# begin(...)
# called once at the beginning; the method gets all arguments but the
# first two from the xml.process_string() call. In the above example it
# gets the "__" as arg[0].
#
# end()
# called once at the end; its return value is used as return value for
# xml.process_string()
#
# open(<tag:string>, <attr:hash>, <selfclosing:bool>)
# called for every opening tag (selfclosing=0) or self-closing empty
# tag (selfclosing=1). <tag> is the tag name, and <attr> is a hash
# with name/value string pairs.
#
# close(<tag:string>, <numchildren:int>)
# called for every closing tag, with tag name and number of child
# elements. From the latter it can be determined if the closed tag
# is a branch or a leaf node. The close() method is also called for
# selfclosing tags, in which case <numchildren> is always 0.
#
# data(<string>)
# called for each data segment
#
# Example:
#
# <foo>123<bar this='is' a="test"/>456</foo>
#
# would cause these interface calls:
#
# open("foo", {}, 0);
# data(123);
# open("bar", { this: "is", a: "test" }, 1);
# close("bar", 0);
# data(456);
# close("foo", 1);
#
#
# Predefined are two interfaces:
#
# xml.tree
#
# Synposis: <node> = xml.process_string(<xml-string>, xml.tree, <attr-prefix:string>);
#
# Example: var node = xml.process_string("<foo>bar</foo>", xml.tree, "attr__");
#
# This parses the <xml-string> and returns it as props.Node property tree,
# which can then be processed with the known property methods, or copied
# to the main property tree: props.copy(node, props.globals.getNode("whatever", 1));
# Attributes are added as regular nodes, whereby the <attr-prefix> string is
# prepended to the attribute names. If collisions can be ruled out, then this
# prefix can be an empty string. If it's nil, then attributes are dropped
# altogether. FlightGear's standard attributes are *not* considered, as this
# parser is explicitly for non-standard XML sources. Standard files can
# easier be loaded with fgfs means.
#
# xml.dump
#
# Example: xml.process_string("<foo>bar</foo>", xml.dump);
#
# This dumps the input xml data to the terminal while parsing. It's meant for
# debugging purposes.
#
#
#
# A minimal interface hash can look like this:
#
# var empty = {
# begin : func {},
# end : func {},
# open : func {},
# close : func {},
# data : func {},
# };
#
# and would be used as: xml.process_string("<foo>bar</foo>", empty);
var printf = func { print(call(sprintf, arg)) }
var isspace = func(c) { c == ` ` or c == `\t` or c == `\n` or c == `\r` }
var isletter = func(c) { c >= `a` and c <= `z` or c >= `A` and c <= `Z` }
var isdigit = func(c) { c >= `0` and c <= `9` }
var isalnum = func(c) { isdigit(c) or isletter(c) }
var istagfirst = func(c) { isletter(c) or c == `_` or c == `:` }
var istagother = func(c) { isalnum(c) or c == `_` or c == `:` or c == `-` or c == `.`}
var ctab = { "lt" : `<`, "gt" : `>`, "amp" : `&`, "quot" : `"`, "apos" : `'` };
var xml_error = "__xml__";
var error = func(msg) die(xml_error ~ msg ~ scan.location());
# SCANNER =========================================================================================
##
# virtual base class: must be derived, adding get() and put()
#
var Scanner = {
new : func {
var m = { parents : [Scanner] };
m.line = 1;
m.column = 0;
return m;
},
get : func die("get() method not implemented"),
put : func die("put() method not implemented"),
skip : func(w, spc = 1) {
spc and me.skip_spaces();
var revert = [];
for (var i = 0; i < size(w); i += 1) {
var c = me.get();
revert = [c] ~ revert;
if (c != w[i]) {
foreach (var r; revert)
me.put(r);
return 0;
}
}
return 1;
},
getname : func {
var s = "";
var c = me.get();
if (!istagfirst(c)) {
me.put(c);
return nil;
}
s ~= chr(c);
while (1) {
c = me.get();
if (!istagother(c))
break;
s ~= chr(c);
}
me.put(c);
return s;
},
getassign : func {
me.skip_spaces();
if (me.get() != `=`)
error("equal sign expected in assignment");
me.skip_spaces();
var s = me.getstring();
if (s == nil)
error("quoted string expected in assignment");
return s;
},
getstring : func(spc = 1) {
spc and me.skip_spaces();
var delim = me.get();
if (delim != `"` and delim != `'`) {
me.put(delim);
return nil;
}
var s = "";
while ((var c = me.get()) != nil and c != delim)
s ~= chr(c == `&` ? me.special() : c);
if (c != delim)
error("string not closed with " ~ chr(delim));
return s;
},
special : func {
var s = "";
var c = me.get();
var n = nil;
if (c == `#`) {
while ((c = me.get()) != nil and isdigit(c) and c != `;`)
s ~= chr(c);
n = num(s);
} else {
me.put(c);
while ((c = me.get()) != nil and c != `;`)
s ~= chr(c);
}
if (c != `;`)
error("entity reference not closed with ;");
if (n != nil)
return n;
if (!contains(ctab, s))
error("unknown entity reference");
return ctab[s];
},
skip_spaces : func {
var n = 0;
while (isspace(var c = me.get()))
n += 1;
me.put(c);
return n;
},
setmark : func(c) {
if (c == `\n`) {
me.line += 1;
me.column = 0;
} else {
me.column += 1;
}
},
location : func {
return " in line " ~ me.line ~ " at position " ~ me.column;
},
dump : func {
var s = "REST=(";
while ((var c = me.get()) != nil)
s ~= chr(c);
error(s ~ ")");
},
};
var StringScanner = {
new : func(s) {
var m = Scanner.new();
m.parents = [StringScanner] ~ m.parents;
m.source = s;
m.pos = 0;
m.buf = [];
return m;
},
get : func {
if (size(me.buf))
return pop(me.buf);
if (me.pos >= size(me.source))
return nil;
var c = me.source[me.pos];
me.pos += 1;
me.setmark(c);
return c;
},
put : func {
foreach (var c; arg)
append(me.buf, c);
return nil;
},
};
# PARSER ==========================================================================================
var parse_xml = func {
var args = caller(0)[0]["arg"] == nil ? [] : arg; # FIXME nasal bug
call(action.begin, args, action);
parse_prolog();
if (!parse_element()) {
var c = scan.get();
if (c == nil)
error("document doesn't contain any data");
scan.put();
error("garbage");
}
parse_misc();
if (scan.get() != nil)
error("trailing garbage");
return action.end();
}
var parse_prolog = func {
parse_xmldecl();
parse_misc();
#parse_dtd();
#parse_misc();
}
var parse_xmldecl = func {
if (!scan.skip("<?"))
return;
if (!scan.skip("xml") or !scan.skip_spaces())
error("prolog with unexpected identifier. xml: epxected");
if (!scan.skip("version"))
error("prolog without version statement");
scan.getassign(); # returns lvalue
if (scan.skip("encoding")) {
scan.getassign();
}
if (scan.skip("standalone")) {
var s = scan.getassign();
if (s != "yes" and s != "no")
error("standalone value must be 'yes' or 'no'");
}
if (!scan.skip("?>"))
error("prolog not closed with ?>");
}
var parse_misc = func {
while (parse_comment() or parse_pi()) {
}
}
var parse_comment = func {
if (!scan.skip("<!--"))
return 0;
while (1) {
if (scan.skip("-->"))
return 1;
if (scan.skip("--"))
error("illegal use of -- in comment");
scan.get();
}
error("unfinished comment");
}
var parse_pi = func {
# TODO
return 0;
}
var parse_rawdata = func {
var c = scan.get();
if (c == `<`) {
scan.put(c);
return nil;
}
var s = chr(c);
while ((c = scan.get()) != `<` and c != nil)
s ~= chr(c == `&` ? scan.special() : c);
scan.put(c);
return s;
}
var parse_cdsect = func {
if (!scan.skip("<![CDATA["))
return nil;
var s = "";
while (1) {
if (scan.skip("]]>"))
return s;
var c = scan.get();
if (c == nil)
break;
s ~= chr(c == `&` ? scan.special() : c);
}
error("unfinished CDATA section");
}
var parse_element = func {
var open = parse_opening_tag();
if (open == nil)
return 0;
if (open[2]) {
action.close(open[0], 0);
return 1; # tag was self-closing
}
var children = 0;
while (1) {
if ((var close = parse_closing_tag()) != nil)
break;
parse_comment();
if ((var d = parse_cdsect()) != nil)
action.data(d);
if ((var d = parse_rawdata()) != nil)
action.data(d);
children += parse_element();
}
if (open[0] != close)
error("<" ~ open[0] ~ "> closed with <" ~ close ~ ">");
action.close(close, children);
return 1;
}
var parse_opening_tag = func {
if (!scan.skip("<"))
return nil;
var c = scan.get();
if (!istagfirst(c)) {
scan.put(c, `<`);
return nil;
}
scan.put(c);
var name = scan.getname(); # can't be nil
var attr = {};
while (1) {
scan.skip_spaces();
var n = scan.getname();
if (n == nil)
break;
var v = scan.getassign();
attr[n] = v;
}
scan.skip_spaces();
if (scan.skip("/>"))
selfclosing = 1;
elsif (scan.skip(">"))
selfclosing = 0;
else
error("trailing garbage in opening tag");
action.open(name, attr, selfclosing);
return [name, attr, selfclosing];
}
var parse_closing_tag = func {
if (!scan.skip("</"))
return nil;
var name = scan.getname();
if (name == nil)
error("closing tag without name");
scan.skip_spaces();
if (!scan.skip(">"))
error("closing tag not ended with >");
return name;
}
var scan = nil;
var action = nil;
var process_string = func(s, a) {
scan = StringScanner.new(s);
action = a;
var err = [];
var args = caller(0)[0]["arg"] == nil ? [] : arg; # FIXME nasal bug
var ret = call(parse_xml, args, nil, nil, err);
if (!size(err))
return ret;
if (substr(err[0], 0, size(xml_error)) == xml_error) {
print("XML: ", substr(err[0], size(xml_error)));
} else {
printf("%s at %s line %d", err[0], err[1], err[2]);
for (var i = 3; i < size(err); i += 2)
printf(" called from %s line %d", err[i], err[i + 1]);
}
return nil;
}
var tree = {
begin : func(prefix = nil) {
me.attr_prefix = prefix;
me.stack = [];
me.root = me.node = props.Node.new();
},
end : func {
return me.node;
},
open : func(name, attr, sc) {
append(me.stack, "");
var index = size(me.node.getChildren(name));
me.node = me.node.getChild(name, index, 1);
if (me.attr_prefix != nil)
foreach (var a; keys(attr))
me.node.getNode(me.attr_prefix ~ a, 1).setValue(attr[a]);
},
close : func(name, children) {
var buf = pop(me.stack);
if (!children and size(buf))
me.node.setValue(buf);
me.node = me.node.getParent();
},
data : func(d) {
me.stack[-1] ~= d;
},
};
var dump = {
begin : func {
me.level = 0;
},
end : func {
},
open : func(name, attr, selfclosed) {
me.print("<", name, ">");
me.level += 1;
foreach (var a; keys(attr)) {
me.print("<attr:" ~ a ~ ">" ~ attr[a] ~ "</attr:" ~ a ~ ">");
}
},
close : func(name, chld) {
me.level -= 1;
me.print("</", name, ">");
},
data : func(data) {
for (var i = 0; i < size(data); i += 1)
if (!isspace(data[i]))
return me.print("'", data, "'");
},
print : func {
var s = "";
for (var i = 0; i < me.level; i += 1)
s ~= "\t";
arg = [s] ~ arg;
call(print, arg);
},
};