# XML parser that allows to parse XML files that don't follow the FlightGear standard by
# storing information in attributes, like the crappy Traffic Manager and AI definition
# files.The XML 1.0 standard isn't fully implemented.
#
# Synopsis:  xml.process_string(<xml-data:string>, <action:hash>);
#            xml.process_file(<filepath>, <action:hash>);
#
# Examples:
#            var n = xml.process_string("<foo>123<foo>", xml.tree, "__");
#            var node = xml.process_file("foo/bar.xml", xml.tree, "__");
#            if (node != nil)
#                    props.dump(node);
#
# The <action> interface (xml.tree) is a hash with function members begin(),
# end(), open(), close(), and data(). Its methods are called by the parser:
#
#     begin(...)
#         called once at the beginning; the method gets all arguments but the
#         first two from the xml.process_string() call. In the above example it
#         gets the "__" as arg[0].
#
#     end()
#         called once at the end; its return value is used as return value for
#         xml.process_string()
#
#     open(<tag:string>, <attr:hash>, <empty:bool>)
#         called for every opening tag (empty=0) or self-closing, empty tag
#         (empty=1). <tag> is the tag name, and <attr> is a hash with one
#         name/value string pair per attribute.
#
#     close(<tag:string>, <numchildren:int>)
#         called for every closing tag, with tag name and number of child
#         elements. From the latter it can be determined if the closed tag
#         is a branch or a leaf node. The close() method is also called for
#         self-closing tags, in which case <numchildren> is always 0.
#
#     data(<string>)
#         called for each data segment
#
# Example:
#
#         <foo>123<bar this='is' a="test"/>456</foo>
#
#     would cause these action interface calls:
#
#         open("foo", {}, 0);
#         data("123");
#         open("bar", { this: "is", a: "test" }, 1);
#         close("bar", 0);
#         data("456");
#         close("foo", 1);
#
#
# Predefined are two action hashes:
#
# xml.tree
#
#     Synopsis: <node> = xml.process_string(<xml-string>, xml.tree, <attr-prefix:string>);
#
#     Example:  var node = xml.process_string("<foo>bar</foo>", xml.tree, "attr__");
#
#     This parses the <xml-string> and returns it as props.Node property tree,
#     which can then be processed with the known property methods, or copied
#     to the main property tree: props.copy(node, props.globals.getNode("whatever", 1));
#     Attributes are added as regular nodes, whereby the <attr-prefix> string is
#     prepended to the attribute names. If collisions can be ruled out, then this
#     prefix can be an empty string. If it's nil, then attributes are dropped
#     altogether. FlightGear's standard attributes are *not* considered, as this
#     parser is explicitly for non-standard XML sources. Standard files can
#     easier and quicker be loaded with fgfs means.
#
# xml.dump
#
#     Example:  xml.process_string("<foo>bar</foo>", xml.dump);
#
#     This dumps the input xml data to the terminal while parsing. It's meant for
#     debugging purposes.
#
#
#
# A minimal interface hash can look like this:
#
#     var do_nothing = {
#         begin : func {},
#         end : func {},
#         open : func {},
#         close : func {},
#         data : func {},
#     };
#
# and would be used as:  xml.process_string("<foo>bar</foo>", do_nothing);


var isspace = func(c) { c == ` ` or c == `\t` or c == `\n` or c == `\r` }
var isletter = func(c) { c >= `a` and c <= `z` or c >= `A` and c <= `Z` }
var isdigit = func(c) { c >= `0` and c <= `9` }
var isalnum = func(c) { isdigit(c) or isletter(c) }

var istagfirst = func(c) { isletter(c) or c == `_` or c == `:` }
var istagother = func(c) { isalnum(c) or c == `_` or c == `:` or c == `-` or c == `.`}

var ctab = { "lt" : `<`, "gt" : `>`, "amp" : `&`, "quot" : `"`, "apos" : `'` };

var error_label = "xml.nas: ";
var error = func(msg) die(error_label ~ msg ~ scan.location());


# SCANNER =========================================================================================


##
# virtual base class: must be derived, adding get() and put()
#
var Scanner = {
	new : func {
		var m = { parents : [Scanner] };
		m.line = 1;
		m.column = 0;
		m.source = " in";
		return m;
	},
	get : func die("get() method not implemented"),
	put : func die("put() method not implemented"),
	skip : func(w, skipspaces = 1) {
		var revert = [];
		if (skipspaces) {
			while (isspace(var c = scan.get()))
				revert = [c] ~ revert;
			scan.put(c);
		}
		for (var i = 0; i < size(w); i += 1) {
			var c = me.get();
			revert = [c] ~ revert;
			if (c != w[i]) {
				foreach (var r; revert)
					me.put(r);
				return 0;
			}
		}
		return 1;
	},
	getname : func {
		var s = "";
		var c = me.get();
		if (!istagfirst(c)) {
			me.put(c);
			return nil;
		}
		s ~= chr(c);
		while (1) {
			c = me.get();
			if (!istagother(c))
				break;
			s ~= chr(c);
		}
		me.put(c);
		return s;
	},
	getassign : func {
		me.skip_spaces();
		if (me.get() != `=`)
			error("equal sign expected in assignment");
		me.skip_spaces();
		var s = me.getstring();
		if (s == nil)
			error("quoted string expected in assignment");
		return s;
	},
	getstring : func(spc = 1) {
		spc and me.skip_spaces();
		var delim = me.get();
		if (delim != `"` and delim != `'`) {
			me.put(delim);
			return nil;
		}
		var s = "";
		while ((var c = me.get()) != nil and c != delim)
			s ~= chr(c == `&` ? me.special() : c);
		if (c != delim)
			error("string not closed with " ~ chr(delim));
		return s;
	},
	special : func {
		var s = "";
		var c = me.get();
		var n = nil;
		if (c == `#`) {
			while ((c = me.get()) != nil and isdigit(c) and c != `;`)
				s ~= chr(c);
			n = num(s);
		} else {
			me.put(c);
			while ((c = me.get()) != nil and c != `;`)
				s ~= chr(c);
		}
		if (c != `;`)
			error("entity reference not closed with ;");
		if (n != nil)
			return n;
		if (!contains(ctab, s))
			error("unknown entity reference");
		return ctab[s];
	},
	skip_spaces : func {
		var n = 0;
		while (isspace(var c = me.get()))
			n += 1;
		me.put(c);
		return n;
	},
	setmark : func(c) {
		if (c == `\n`) {
			me.line += 1;
			me.column = 0;
		} else {
			me.column += 1;
		}
	},
	location : func {
		return me.source ~ " line " ~ me.line ~ ", column " ~ me.column;
	},
	dump : func {
		var s = "";
		while ((var c = me.get()) != nil)
			s ~= chr(c);
		error("REST={" ~ s ~ "}");
	},
};


##
# child class of Scanner class; knows how to read characters from a string,
# and how to push them back for later use
#
var StringScanner = {
	new : func(s) {
		var m = Scanner.new();
		m.parents = [StringScanner] ~ m.parents;
		m.string = s;
		m.pos = 0;
		m.stack = [];
		return m;
	},
	get : func {
		if (size(me.stack))
			return pop(me.stack);
		if (me.pos >= size(me.string))
			return nil;
		var c = me.string[me.pos];
		me.pos += 1;
		me.setmark(c);
		return c;
	},
	put : func {
		foreach (var c; arg)
			append(me.stack, c);
	},
};


# PARSER ==========================================================================================

var parse_document = func(arg...) {
	call(action.begin, arg, action);
	parse_prolog();
	if (!parse_element()) {
		var c = scan.get();
		if (c == nil)
			error("document doesn't contain any data");
		scan.put();
		error("garbage");
	}

	parse_misc();
	scan.skip_spaces();
	if (scan.get() != nil)
		error("trailing garbage");

	return action.end();
}


var parse_prolog = func {
	parse_xmldecl();
	parse_misc();
	parse_doctype();
	parse_misc();
}


var parse_xmldecl = func {
	if (!scan.skip("<?"))
		return;
	if (!scan.skip("xml") or !scan.skip_spaces())
		error("prolog with invalid identifier. xml: expected");
	if (!scan.skip("version"))
		error("prolog without version statement");
	scan.getassign();	# returns lvalue
	if (scan.skip("encoding")) {
		scan.getassign();
	}
	if (scan.skip("standalone")) {
		var s = scan.getassign();
		if (s != "yes" and s != "no")
			error("standalone value must be 'yes' or 'no'");
	}
	if (!scan.skip("?>"))
		error("prolog not closed with ?>");
}


var parse_misc = func {
	while (parse_comment() or parse_pi()) {
	}
}


var parse_comment = func {
	if (!scan.skip("<!--"))
		return 0;
	while (1) {
		if (scan.skip("-->"))
			return 1;
		if (scan.skip("--"))
			error("illegal use of -- in comment");
		scan.get();
	}
	error("unfinished comment");
}


var parse_pi = func {
	if (!scan.skip("<?"))
		return 0;
	while (1) {
		if (scan.skip("?>"))
			return 1;
		scan.get();
	}
	error("unfinished 'processing instruction'");
}


var parse_doctype = func {
	if (!scan.skip("<!"))
		return 0;
	while (1) {
		parse_doctype();

		if (scan.skip(">"))
			return 1;
		scan.get();
	}
	error("unfinished doctype");
}


var parse_rawdata = func {
	var c = scan.get();
	if (c == `<`) {
		scan.put(c);
		return nil;
	}
	
	var s = chr(c);
	while ((c = scan.get()) != `<` and c != nil)
		s ~= chr(c == `&` ? scan.special() : c);
	scan.put(c);
	return s;
}


var parse_cdsect = func {
	if (!scan.skip("<![CDATA["))
		return nil;
	var s = "";
	while (1) {
		if (scan.skip("]]>"))
			return s;
		var c = scan.get();
		if (c == nil)
			break;
		s ~= chr(c == `&` ? scan.special() : c);
	}
	error("unfinished CDATA section");
}


var parse_element = func {
	var open = parse_opening_tag();
	if (open == nil)
		return 0;
	if (open[2]) {
		action.close(open[0], 0);
		return 1;	# tag was self-closing
	}

	var children = 0;
	while (1) {
		if ((var close = parse_closing_tag()) != nil)
			break;
		parse_comment();
		if ((var d = parse_cdsect()) != nil)
			action.data(d);
		if ((var d = parse_rawdata()) != nil)
			action.data(d);
		children += parse_element();
	}
	if (open[0] != close)
		error("<" ~ open[0] ~ "> closed with <" ~ close ~ ">");
	action.close(close, children);
	return 1;
}


var parse_opening_tag = func {
	if (!scan.skip("<"))
		return nil;
	var c = scan.get();
	if (!istagfirst(c)) {
		scan.put(c, `<`);
		return nil;
	}
	scan.put(c);
	var name = scan.getname();	# can't be nil
	var attr = {};
	while (1) {
		scan.skip_spaces();
		var n = scan.getname();
		if (n == nil)
			break;
		var v = scan.getassign();
		attr[n] = v;
	}
	if (scan.skip("/>"))
		selfclosing = 1;
	elsif (scan.skip(">"))
		selfclosing = 0;
	else
		error("garbage in opening tag");
	action.open(name, attr, selfclosing);
	return [name, attr, selfclosing];
}


var parse_closing_tag = func {
	if (!scan.skip("</"))
		return nil;
	var name = scan.getname();
	if (name == nil)
		error("closing tag without name");
	if (!scan.skip(">"))
		error("closing tag not ended with >");
	return name;
}


# ACTION HASHES ===================================================================================

var tree = {
	begin : func(prefix) {
		me.prefix = prefix;
		me.stack = [];
		me.node = props.Node.new();
	},
	end : func {
		return me.node;
	},
	open : func(name, attr) {
		append(me.stack, "");
		var index = size(me.node.getChildren(name));
		me.node = me.node.getChild(name, index, 1);
		if (me.prefix != nil)
			foreach (var n; keys(attr))
				me.node.getNode(me.prefix ~ n, 1).setValue(attr[n]);
	},
	close : func(name, children) {
		var buf = pop(me.stack);
		if (!children and size(buf))
			me.node.setValue(buf);
		me.node = me.node.getParent();
	},
	data : func(d) {
		me.stack[-1] ~= d;
	},
};


var dump = {
	begin : func(prefix = "__") {
		me.prefix = prefix;
		me.level = 0;
	},
	end : func {
	},
	open : func(name, attr) {
		me.print("<", name, ">");
		me.level += 1;
		foreach (var n; sort(keys(attr), cmp))
			me.print("<", , me.prefix, n, ">", attr[n], "</", me.prefix, n, ">");
	},
	close : func(name) {
		me.level -= 1;
		me.print("</", name, ">");
	},
	data : func(data) {
		for (var i = 0; i < size(data); i += 1)
			if (!isspace(data[i]))
				return me.print("'", data, "'");
	},
	print : func {
		var s = "";
		for (var i = 0; i < me.level; i += 1)
			s ~= "\t";
		call(print, [s] ~ arg);
	},
};


var process = func(arg...) {
	var err = [];
	var ret = call(parse_document, arg, err);
	if (!size(err))
		return ret;
	if (substr(err[0], 0, size(error_label)) != error_label)
		die(err[0]);  # rethrow

	print(err[0]);
	return nil;
}


var scan = nil;
var action = nil;


# INTERFACE =======================================================================================

var process_string = func(string, act, arg...) {
	scan = StringScanner.new(string);
	action = act;
	return call(process, arg);
}


var process_file = func(file, act, arg...) {
	scan = StringScanner.new(io.readfile(file));
	scan.source = "\n  in file " ~ file ~ ",";
	action = act;
	return call(process, arg);
}