bye ... we have a Nasal interface to the built-in EasyXML parser now
This commit is contained in:
parent
23bb26651d
commit
968c517a71
1 changed files with 0 additions and 553 deletions
553
Nasal/xml.nas
553
Nasal/xml.nas
|
@ -1,553 +0,0 @@
|
|||
# XML parser that allows to parse XML files that don't follow the FlightGear standard by
|
||||
# storing information in attributes, like the crappy Traffic Manager and AI definition
|
||||
# files.The XML 1.0 standard isn't fully implemented.
|
||||
#
|
||||
# Synopsis: xml.process_string(<xml-data:string>, <action:hash>);
|
||||
# xml.process_file(<filepath>, <action:hash>);
|
||||
#
|
||||
# Examples:
|
||||
# var n = xml.process_string("<foo>123<foo>", xml.tree, "__");
|
||||
# var node = xml.process_file("foo/bar.xml", xml.tree, "__");
|
||||
# if (node != nil)
|
||||
# props.dump(node);
|
||||
#
|
||||
# The <action> interface (xml.tree) is a hash with function members begin(),
|
||||
# end(), open(), close(), and data(). Its methods are called by the parser:
|
||||
#
|
||||
# begin(...)
|
||||
# called once at the beginning; the method gets all arguments but the
|
||||
# first two from the xml.process_string() call. In the above example it
|
||||
# gets the "__" as arg[0].
|
||||
#
|
||||
# end()
|
||||
# called once at the end; its return value is used as return value for
|
||||
# xml.process_string()
|
||||
#
|
||||
# open(<tag:string>, <attr:hash>, <empty:bool>)
|
||||
# called for every opening tag (empty=0) or self-closing, empty tag
|
||||
# (empty=1). <tag> is the tag name, and <attr> is a hash with one
|
||||
# name/value string pair per attribute.
|
||||
#
|
||||
# close(<tag:string>, <numchildren:int>)
|
||||
# called for every closing tag, with tag name and number of child
|
||||
# elements. From the latter it can be determined if the closed tag
|
||||
# is a branch or a leaf node. The close() method is also called for
|
||||
# self-closing tags, in which case <numchildren> is always 0.
|
||||
#
|
||||
# data(<string>)
|
||||
# called for each data segment
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# <foo>123<bar this='is' a="test"/>456</foo>
|
||||
#
|
||||
# would cause these action interface calls:
|
||||
#
|
||||
# open("foo", {}, 0);
|
||||
# data("123");
|
||||
# open("bar", { this: "is", a: "test" }, 1);
|
||||
# close("bar", 0);
|
||||
# data("456");
|
||||
# close("foo", 1);
|
||||
#
|
||||
#
|
||||
# Predefined are two action hashes:
|
||||
#
|
||||
# xml.tree
|
||||
#
|
||||
# Synopsis: <node> = xml.process_string(<xml-string>, xml.tree, <attr-prefix:string>);
|
||||
#
|
||||
# Example: var node = xml.process_string("<foo>bar</foo>", xml.tree, "attr__");
|
||||
#
|
||||
# This parses the <xml-string> and returns it as props.Node property tree,
|
||||
# which can then be processed with the known property methods, or copied
|
||||
# to the main property tree: props.copy(node, props.globals.getNode("whatever", 1));
|
||||
# Attributes are added as regular nodes, whereby the <attr-prefix> string is
|
||||
# prepended to the attribute names. If collisions can be ruled out, then this
|
||||
# prefix can be an empty string. If it's nil, then attributes are dropped
|
||||
# altogether. FlightGear's standard attributes are *not* considered, as this
|
||||
# parser is explicitly for non-standard XML sources. Standard files can
|
||||
# easier and quicker be loaded with fgfs means.
|
||||
#
|
||||
# xml.dump
|
||||
#
|
||||
# Example: xml.process_string("<foo>bar</foo>", xml.dump);
|
||||
#
|
||||
# This dumps the input xml data to the terminal while parsing. It's meant for
|
||||
# debugging purposes.
|
||||
#
|
||||
#
|
||||
#
|
||||
# A minimal interface hash can look like this:
|
||||
#
|
||||
# var do_nothing = {
|
||||
# begin : func {},
|
||||
# end : func {},
|
||||
# open : func {},
|
||||
# close : func {},
|
||||
# data : func {},
|
||||
# };
|
||||
#
|
||||
# and would be used as: xml.process_string("<foo>bar</foo>", do_nothing);
|
||||
|
||||
|
||||
var isspace = func(c) { c == ` ` or c == `\t` or c == `\n` or c == `\r` }
|
||||
var isletter = func(c) { c >= `a` and c <= `z` or c >= `A` and c <= `Z` }
|
||||
var isdigit = func(c) { c >= `0` and c <= `9` }
|
||||
var isalnum = func(c) { isdigit(c) or isletter(c) }
|
||||
|
||||
var istagfirst = func(c) { isletter(c) or c == `_` or c == `:` }
|
||||
var istagother = func(c) { isalnum(c) or c == `_` or c == `:` or c == `-` or c == `.`}
|
||||
|
||||
var ctab = { "lt" : `<`, "gt" : `>`, "amp" : `&`, "quot" : `"`, "apos" : `'` };
|
||||
|
||||
var error_label = "xml.nas: ";
|
||||
var error = func(msg) die(error_label ~ msg ~ scan.location());
|
||||
|
||||
|
||||
# SCANNER =========================================================================================
|
||||
|
||||
|
||||
##
|
||||
# virtual base class: must be derived, adding get() and put()
|
||||
#
|
||||
var Scanner = {
|
||||
new : func {
|
||||
var m = { parents : [Scanner] };
|
||||
m.line = 1;
|
||||
m.column = 0;
|
||||
m.source = " in";
|
||||
return m;
|
||||
},
|
||||
get : func die("get() method not implemented"),
|
||||
put : func die("put() method not implemented"),
|
||||
skip : func(w, skipspaces = 1) {
|
||||
var revert = [];
|
||||
if (skipspaces) {
|
||||
while (isspace(var c = scan.get()))
|
||||
revert = [c] ~ revert;
|
||||
scan.put(c);
|
||||
}
|
||||
for (var i = 0; i < size(w); i += 1) {
|
||||
var c = me.get();
|
||||
revert = [c] ~ revert;
|
||||
if (c != w[i]) {
|
||||
foreach (var r; revert)
|
||||
me.put(r);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
},
|
||||
getname : func {
|
||||
var s = "";
|
||||
var c = me.get();
|
||||
if (!istagfirst(c)) {
|
||||
me.put(c);
|
||||
return nil;
|
||||
}
|
||||
s ~= chr(c);
|
||||
while (1) {
|
||||
c = me.get();
|
||||
if (!istagother(c))
|
||||
break;
|
||||
s ~= chr(c);
|
||||
}
|
||||
me.put(c);
|
||||
return s;
|
||||
},
|
||||
getassign : func {
|
||||
me.skip_spaces();
|
||||
if (me.get() != `=`)
|
||||
error("equal sign expected in assignment");
|
||||
me.skip_spaces();
|
||||
var s = me.getstring();
|
||||
if (s == nil)
|
||||
error("quoted string expected in assignment");
|
||||
return s;
|
||||
},
|
||||
getstring : func(spc = 1) {
|
||||
spc and me.skip_spaces();
|
||||
var delim = me.get();
|
||||
if (delim != `"` and delim != `'`) {
|
||||
me.put(delim);
|
||||
return nil;
|
||||
}
|
||||
var s = "";
|
||||
while ((var c = me.get()) != nil and c != delim)
|
||||
s ~= chr(c == `&` ? me.special() : c);
|
||||
if (c != delim)
|
||||
error("string not closed with " ~ chr(delim));
|
||||
return s;
|
||||
},
|
||||
special : func {
|
||||
var s = "";
|
||||
var c = me.get();
|
||||
var n = nil;
|
||||
if (c == `#`) {
|
||||
while ((c = me.get()) != nil and isdigit(c) and c != `;`)
|
||||
s ~= chr(c);
|
||||
n = num(s);
|
||||
} else {
|
||||
me.put(c);
|
||||
while ((c = me.get()) != nil and c != `;`)
|
||||
s ~= chr(c);
|
||||
}
|
||||
if (c != `;`)
|
||||
error("entity reference not closed with ;");
|
||||
if (n != nil)
|
||||
return n;
|
||||
if (!contains(ctab, s))
|
||||
error("unknown entity reference");
|
||||
return ctab[s];
|
||||
},
|
||||
skip_spaces : func {
|
||||
var n = 0;
|
||||
while (isspace(var c = me.get()))
|
||||
n += 1;
|
||||
me.put(c);
|
||||
return n;
|
||||
},
|
||||
setmark : func(c) {
|
||||
if (c == `\n`) {
|
||||
me.line += 1;
|
||||
me.column = 0;
|
||||
} else {
|
||||
me.column += 1;
|
||||
}
|
||||
},
|
||||
location : func {
|
||||
return me.source ~ " line " ~ me.line ~ ", column " ~ me.column;
|
||||
},
|
||||
dump : func {
|
||||
var s = "";
|
||||
while ((var c = me.get()) != nil)
|
||||
s ~= chr(c);
|
||||
error("REST={" ~ s ~ "}");
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
##
|
||||
# child class of Scanner class; knows how to read characters from a string,
|
||||
# and how to push them back for later use
|
||||
#
|
||||
var StringScanner = {
|
||||
new : func(s) {
|
||||
var m = Scanner.new();
|
||||
m.parents = [StringScanner] ~ m.parents;
|
||||
m.string = s;
|
||||
m.pos = 0;
|
||||
m.stack = [];
|
||||
return m;
|
||||
},
|
||||
get : func {
|
||||
if (size(me.stack))
|
||||
return pop(me.stack);
|
||||
if (me.pos >= size(me.string))
|
||||
return nil;
|
||||
var c = me.string[me.pos];
|
||||
me.pos += 1;
|
||||
me.setmark(c);
|
||||
return c;
|
||||
},
|
||||
put : func {
|
||||
foreach (var c; arg)
|
||||
append(me.stack, c);
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
# PARSER ==========================================================================================
|
||||
|
||||
var parse_document = func(arg...) {
|
||||
call(action.begin, arg, action);
|
||||
parse_prolog();
|
||||
if (!parse_element()) {
|
||||
var c = scan.get();
|
||||
if (c == nil)
|
||||
error("document doesn't contain any data");
|
||||
scan.put();
|
||||
error("garbage");
|
||||
}
|
||||
|
||||
parse_misc();
|
||||
scan.skip_spaces();
|
||||
if (scan.get() != nil)
|
||||
error("trailing garbage");
|
||||
|
||||
return action.end();
|
||||
}
|
||||
|
||||
|
||||
var parse_prolog = func {
|
||||
parse_xmldecl();
|
||||
parse_misc();
|
||||
parse_doctype();
|
||||
parse_misc();
|
||||
}
|
||||
|
||||
|
||||
var parse_xmldecl = func {
|
||||
if (!scan.skip("<?"))
|
||||
return;
|
||||
if (!scan.skip("xml") or !scan.skip_spaces())
|
||||
error("prolog with invalid identifier. xml: expected");
|
||||
if (!scan.skip("version"))
|
||||
error("prolog without version statement");
|
||||
scan.getassign(); # returns lvalue
|
||||
if (scan.skip("encoding")) {
|
||||
scan.getassign();
|
||||
}
|
||||
if (scan.skip("standalone")) {
|
||||
var s = scan.getassign();
|
||||
if (s != "yes" and s != "no")
|
||||
error("standalone value must be 'yes' or 'no'");
|
||||
}
|
||||
if (!scan.skip("?>"))
|
||||
error("prolog not closed with ?>");
|
||||
}
|
||||
|
||||
|
||||
var parse_misc = func {
|
||||
while (parse_comment() or parse_pi()) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var parse_comment = func {
|
||||
if (!scan.skip("<!--"))
|
||||
return 0;
|
||||
while (1) {
|
||||
if (scan.skip("-->"))
|
||||
return 1;
|
||||
if (scan.skip("--"))
|
||||
error("illegal use of -- in comment");
|
||||
scan.get();
|
||||
}
|
||||
error("unfinished comment");
|
||||
}
|
||||
|
||||
|
||||
var parse_pi = func {
|
||||
if (!scan.skip("<?"))
|
||||
return 0;
|
||||
while (1) {
|
||||
if (scan.skip("?>"))
|
||||
return 1;
|
||||
scan.get();
|
||||
}
|
||||
error("unfinished 'processing instruction'");
|
||||
}
|
||||
|
||||
|
||||
var parse_doctype = func {
|
||||
if (!scan.skip("<!"))
|
||||
return 0;
|
||||
while (1) {
|
||||
parse_doctype();
|
||||
|
||||
if (scan.skip(">"))
|
||||
return 1;
|
||||
scan.get();
|
||||
}
|
||||
error("unfinished doctype");
|
||||
}
|
||||
|
||||
|
||||
var parse_rawdata = func {
|
||||
var c = scan.get();
|
||||
if (c == `<`) {
|
||||
scan.put(c);
|
||||
return nil;
|
||||
}
|
||||
|
||||
var s = chr(c);
|
||||
while ((c = scan.get()) != `<` and c != nil)
|
||||
s ~= chr(c == `&` ? scan.special() : c);
|
||||
scan.put(c);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
var parse_cdsect = func {
|
||||
if (!scan.skip("<![CDATA["))
|
||||
return nil;
|
||||
var s = "";
|
||||
while (1) {
|
||||
if (scan.skip("]]>"))
|
||||
return s;
|
||||
var c = scan.get();
|
||||
if (c == nil)
|
||||
break;
|
||||
s ~= chr(c == `&` ? scan.special() : c);
|
||||
}
|
||||
error("unfinished CDATA section");
|
||||
}
|
||||
|
||||
|
||||
var parse_element = func {
|
||||
var open = parse_opening_tag();
|
||||
if (open == nil)
|
||||
return 0;
|
||||
if (open[2]) {
|
||||
action.close(open[0], 0);
|
||||
return 1; # tag was self-closing
|
||||
}
|
||||
|
||||
var children = 0;
|
||||
while (1) {
|
||||
if ((var close = parse_closing_tag()) != nil)
|
||||
break;
|
||||
parse_comment();
|
||||
if ((var d = parse_cdsect()) != nil)
|
||||
action.data(d);
|
||||
if ((var d = parse_rawdata()) != nil)
|
||||
action.data(d);
|
||||
children += parse_element();
|
||||
}
|
||||
if (open[0] != close)
|
||||
error("<" ~ open[0] ~ "> closed with <" ~ close ~ ">");
|
||||
action.close(close, children);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
var parse_opening_tag = func {
|
||||
if (!scan.skip("<"))
|
||||
return nil;
|
||||
var c = scan.get();
|
||||
if (!istagfirst(c)) {
|
||||
scan.put(c, `<`);
|
||||
return nil;
|
||||
}
|
||||
scan.put(c);
|
||||
var name = scan.getname(); # can't be nil
|
||||
var attr = {};
|
||||
while (1) {
|
||||
scan.skip_spaces();
|
||||
var n = scan.getname();
|
||||
if (n == nil)
|
||||
break;
|
||||
var v = scan.getassign();
|
||||
attr[n] = v;
|
||||
}
|
||||
if (scan.skip("/>"))
|
||||
selfclosing = 1;
|
||||
elsif (scan.skip(">"))
|
||||
selfclosing = 0;
|
||||
else
|
||||
error("garbage in opening tag");
|
||||
action.open(name, attr, selfclosing);
|
||||
return [name, attr, selfclosing];
|
||||
}
|
||||
|
||||
|
||||
var parse_closing_tag = func {
|
||||
if (!scan.skip("</"))
|
||||
return nil;
|
||||
var name = scan.getname();
|
||||
if (name == nil)
|
||||
error("closing tag without name");
|
||||
if (!scan.skip(">"))
|
||||
error("closing tag not ended with >");
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
# ACTION HASHES ===================================================================================
|
||||
|
||||
var tree = {
|
||||
begin : func(prefix) {
|
||||
me.prefix = prefix;
|
||||
me.stack = [];
|
||||
me.node = props.Node.new();
|
||||
},
|
||||
end : func {
|
||||
return me.node;
|
||||
},
|
||||
open : func(name, attr) {
|
||||
append(me.stack, "");
|
||||
var index = size(me.node.getChildren(name));
|
||||
me.node = me.node.getChild(name, index, 1);
|
||||
if (me.prefix != nil)
|
||||
foreach (var n; keys(attr))
|
||||
me.node.getNode(me.prefix ~ n, 1).setValue(attr[n]);
|
||||
},
|
||||
close : func(name, children) {
|
||||
var buf = pop(me.stack);
|
||||
if (!children and size(buf))
|
||||
me.node.setValue(buf);
|
||||
me.node = me.node.getParent();
|
||||
},
|
||||
data : func(d) {
|
||||
me.stack[-1] ~= d;
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
var dump = {
|
||||
begin : func(prefix = "__") {
|
||||
me.prefix = prefix;
|
||||
me.level = 0;
|
||||
},
|
||||
end : func {
|
||||
},
|
||||
open : func(name, attr) {
|
||||
me.print("<", name, ">");
|
||||
me.level += 1;
|
||||
foreach (var n; sort(keys(attr), cmp))
|
||||
me.print("<", , me.prefix, n, ">", attr[n], "</", me.prefix, n, ">");
|
||||
},
|
||||
close : func(name) {
|
||||
me.level -= 1;
|
||||
me.print("</", name, ">");
|
||||
},
|
||||
data : func(data) {
|
||||
for (var i = 0; i < size(data); i += 1)
|
||||
if (!isspace(data[i]))
|
||||
return me.print("'", data, "'");
|
||||
},
|
||||
print : func {
|
||||
var s = "";
|
||||
for (var i = 0; i < me.level; i += 1)
|
||||
s ~= "\t";
|
||||
call(print, [s] ~ arg);
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
var process = func(arg...) {
|
||||
var err = [];
|
||||
var ret = call(parse_document, arg, err);
|
||||
if (!size(err))
|
||||
return ret;
|
||||
if (substr(err[0], 0, size(error_label)) != error_label)
|
||||
die(err[0]); # rethrow
|
||||
|
||||
print(err[0]);
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
var scan = nil;
|
||||
var action = nil;
|
||||
|
||||
|
||||
# INTERFACE =======================================================================================
|
||||
|
||||
var process_string = func(string, act, arg...) {
|
||||
scan = StringScanner.new(string);
|
||||
action = act;
|
||||
return call(process, arg);
|
||||
}
|
||||
|
||||
|
||||
var process_file = func(file, act, arg...) {
|
||||
scan = StringScanner.new(io.readfile(file));
|
||||
scan.source = "\n in file " ~ file ~ ",";
|
||||
action = act;
|
||||
return call(process, arg);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in a new issue