1
0
Fork 0

Erik HOFMAN: faster and better xmlgrep implementation

This commit is contained in:
mfranz 2008-06-29 13:08:24 +00:00
parent ecd5521bf8
commit 24071a3133
5 changed files with 1217 additions and 251 deletions

View file

@ -1,4 +1,3 @@
noinst_PROGRAMS = xmlgrep
xmlgrep_SOURCES = xmlgrep.cxx
xmlgrep_LDADD = -lsgstructure -lsgprops -lsgmisc -lsgdebug -lsgxml
xmlgrep_SOURCES = xmlgrep.c xml.c xml.h

733
utils/xmlgrep/xml.c Normal file
View file

@ -0,0 +1,733 @@
/* Copyright (c) 2007,2008 by Adalin B.V.
* Copyright (c) 2007,2008 by Erik Hofman
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of (any of) the copyrightholder(s) nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
typedef struct
{
HANDLE m;
void *p;
} SIMPLE_UNMMAP;
static SIMPLE_UNMMAP un;
/*
* map 'filename' and return a pointer to it.
*/
void *simple_mmap(int, unsigned int, SIMPLE_UNMMAP *);
void simple_unmmap(SIMPLE_UNMMAP *);
#define mmap(a,b,c,d,e,f) simple_mmap((e), (b), &un)
#define munmap(a,b) simple_unmmap(&un)
#else /* !WIN32 */
# include <sys/mman.h>
# include <fcntl.h>
#endif
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct _xml_id
{
char *start;
size_t len;
int fd;
};
static char *__xmlCopyNode(char *, size_t, const char *);
static char *__xmlGetNode(char *, size_t, const char *, size_t *);
static char *__xmlFindNextElement(char *, size_t, const char *);
static void *memncasemem(const void *, size_t, const void *, size_t);
void *
xmlOpen(char *fn)
{
struct _xml_id *id = 0;
if (fn)
{
int fd = open(fn, O_RDONLY);
if (fd > 0)
{
id = malloc(sizeof(struct _xml_id));
if (id)
{
struct stat statbuf;
fstat(fd, &statbuf);
id->fd = fd;
id->len = statbuf.st_size;
id->start = mmap(0, id->len, PROT_READ, MAP_PRIVATE, fd, 0L);
}
}
}
return (void*)id;
}
void
xmlClose(void *id)
{
if (id)
{
struct _xml_id *xid = (struct _xml_id *)id;
munmap(xid->start, xid->len);
close(xid->fd);
free(id);
id = 0;
}
}
void *
xmlCopyNode(void *id, char *path)
{
struct _xml_id *xsid = 0;
if (id && path)
{
struct _xml_id *xid = (struct _xml_id *)id;
char *ptr, *p;
size_t rlen;
ptr = __xmlGetNode(xid->start, xid->len, path, &rlen);
if (ptr)
{
xsid = malloc(sizeof(struct _xml_id) + rlen);
if (xsid)
{
p = (char *)xsid + sizeof(struct _xml_id);
xsid->len = rlen;
xsid->start = p;
xsid->fd = 0;
memcpy(xsid->start, ptr, rlen);
}
}
}
return (void *)xsid;
}
void *
xmlGetNode(void *id, char *path)
{
struct _xml_id *xsid = 0;
if (id && path)
{
struct _xml_id *xid = (struct _xml_id *)id;
size_t rlen;
char *ptr;
ptr = __xmlGetNode(xid->start, xid->len, path, &rlen);
if (ptr)
{
xsid = malloc(sizeof(struct _xml_id));
xsid->len = rlen;
xsid->start = ptr;
xsid->fd = 0;
}
}
return (void *)xsid;
}
void *
xmlGetNextElement(const void *pid, void *id, char *path)
{
struct _xml_id *xpid = (struct _xml_id *)pid;
struct _xml_id *xsid = 0;
if (id && path)
{
struct _xml_id *xid = (struct _xml_id *)id;
size_t rlen, nlen;
char *ptr;
if (xid->len < xpid->len) xid->start += xid->len;
nlen = xpid->len - (xid->start - xpid->start);
ptr = __xmlGetNode(xid->start, nlen, path, &rlen);
if (ptr)
{
xid->len = rlen;
xid->start = ptr;
xsid = xid;
}
}
return (void *)xsid;
}
int
xmlCompareString(const void *id, const char *s)
{
struct _xml_id *xid = (struct _xml_id *)id;
int ret = -1;
if (xid && xid->len && s && (strlen(s) > 0))
{
ret = strncasecmp(xid->start, s, xid->len);
}
return ret;
}
int
xmlCompareNodeString(const void *id, const char *path, const char *s)
{
struct _xml_id *xid = (struct _xml_id *)id;
int ret = -1;
if (xid && xid->len && path && s && (strlen(s) > 0))
{
size_t rlen;
char *str;
str = __xmlGetNode(xid->start, xid->len, path, &rlen);
if (str) ret = strncasecmp(str, s, rlen);
}
return ret;
}
char *
xmlGetNodeString(void *id, const char *path)
{
struct _xml_id *xid = (struct _xml_id *)id;
char *str = 0;
if (xid && xid->len && path)
{
str = __xmlCopyNode(xid->start, xid->len, path);
if (str)
{
char *ps, *pe, *pem;
int slen;
slen = strlen(str);
ps = str;
pe = pem = ps+slen;
while ((ps<pe) && isspace(*ps)) ps++;
while ((pe>ps) && isspace(*pe)) pe--;
if (pe<pem) *++pe = 0;
slen = (pe-ps);
if ((ps>str) && slen) memmove(str, ps, slen+1);
else if (!slen) *str = 0;
}
}
return str;
}
char *
xmlGetString(void *id)
{
struct _xml_id *xid = (struct _xml_id *)id;
char *str = 0;
if (xid && xid->len)
{
str = malloc(xid->len+1);
if (str)
{
char *ps, *pe, *pem;
int slen;
slen = xid->len;
memcpy(str, xid->start, slen);
*(str+slen) = 0;
ps = str;
pe = pem = ps+slen;
while ((ps<pe) && isspace(*ps)) ps++;
while ((pe>ps) && isspace(*pe)) pe--;
if (pe<pem) *++pe = 0;
slen = (pe-ps);
if ((ps>str) && slen) memmove(str, ps, slen+1);
else if (!slen) *str = 0;
}
}
return str;
}
unsigned int
xmlCopyString(void *id, const char *path, char *buffer, unsigned int buflen)
{
struct _xml_id *xid = (struct _xml_id *)id;
unsigned int rlen = 0;
if (xid && xid->len && path && buffer && buflen)
{
char *str;
*buffer = 0;
str = __xmlGetNode(xid->start, xid->len, path, &rlen);
if (str)
{
char *ps, *pe;
ps = str;
pe = ps+rlen-1;
while ((ps<pe) && isspace(*ps)) ps++;
while ((pe>ps) && isspace(*pe)) pe--;
rlen = (pe-ps)+1;
if (rlen >= buflen) rlen = buflen-1;
memcpy(buffer, ps, rlen);
str = buffer + rlen;
*str = 0;
}
}
return rlen;
}
long int
xmlGetNodeInt(void *id, const char *path)
{
struct _xml_id *xid = (struct _xml_id *)id;
long int li = 0;
if (path && xid && xid->len)
{
unsigned int rlen;
char *str;
str = __xmlGetNode(xid->start, xid->len, path, &rlen);
if (str) li = strtol(str, (char **)NULL, 10);
}
return li;
}
long int
xmlGetInt(void *id)
{
struct _xml_id *xid = (struct _xml_id *)id;
long int li = 0;
if (xid && xid->len)
li = strtol(xid->start, (char **)NULL, 10);
return li;
}
double
xmlGetNodeDouble(void *id, const char *path)
{
struct _xml_id *xid = (struct _xml_id *)id;
double d = 0.0;
if (path && xid && xid->len)
{
unsigned int rlen;
char *str;
str = __xmlGetNode(xid->start, xid->len, path, &rlen);
if (str) d = strtod(str, (char **)NULL);
}
return d;
}
double
xmlGetDouble(void *id)
{
struct _xml_id *xid = (struct _xml_id *)id;
double d = 0.0;
if (xid && xid->len)
d = strtod(xid->start, (char **)NULL);
return d;
}
unsigned int
xmlGetNumElements(void *id, const char *path)
{
struct _xml_id *xid = (struct _xml_id *)id;
unsigned ret = 0;
if (xid && xid->len && path)
{
unsigned int clen;
char *p, *pathname;
char *pname, *nname;
pathname = (char *)path;
if (*path == '/') pathname++;
nname = strrchr(pathname, '/');
if (nname)
{
unsigned int plen = nname-pathname;
pname = calloc(1, plen+1);
memcpy(pname, path, plen);
p = __xmlGetNode(xid->start, xid->len, pname, &clen);
free(pname);
}
else
{
nname = (char *)pathname;
p = (char *)xid->start;
clen = xid->len;
}
while ((p = __xmlFindNextElement(p, clen, nname)) != 0)
ret++;
}
return ret;
}
void *
xmlMarkId(void *id)
{
struct _xml_id *xmid = 0;
if (id)
{
xmid = malloc(sizeof(struct _xml_id));
if (xmid)
{
memcpy(xmid, id, sizeof(struct _xml_id));
xmid->fd = 0;
}
}
return (void *)xmid;
}
/* -------------------------------------------------------------------------- */
char *
__xmlCopyNode(char *start, size_t len, const char *path)
{
char *p, *ret = 0;
size_t rlen;
p = __xmlGetNode(start, len, path, &rlen);
if (p && rlen)
{
ret = calloc(1, rlen+1);
memcpy(ret, p, rlen);
}
return ret;
}
char *
__xmlGetNode(char *start, size_t len, const char *path, size_t *rlen)
{
char *ret = 0;
if (len)
{
char last_node = 0;
char *ptr, *name;
int plen;
name = (char *)path;
if (*name == '/') name++; /* skip the leading '/' character */
ptr = strchr(name, '/');
if (!ptr)
{
last_node = 1;
ptr = name+strlen(name);
}
plen = ptr - name;
if (plen)
{
char *p, *cur;
cur = start;
do
{
if ((p = memncasemem(cur, len, name, plen)) != 0)
{
len -= (p + plen) - cur;
cur = p + plen;
}
}
while (p && (*(p-1) != '<'));
if (p)
{
p = cur;
while ((*cur++ != '>') && (cur<(p+len)));
len -= cur - p;
if (last_node)
{
char *rptr = cur;
do
{
if ((p = memncasemem(cur, len, name, plen)) != 0)
{
len -= (p + plen) - cur;
cur = p + plen;
if (*(p-2) == '<' && *(p-1) == '/'
&& *(p+plen) == '>') break;
}
}
while (p);
if (p)
{
*rlen = p-rptr-2;
ret = rptr;
}
}
else
ret = __xmlGetNode(cur, len, ptr+1, rlen);
}
}
}
return ret;
}
char *
__xmlFindNextElement(char *start, size_t len, const char *name)
{
char *ret = 0;
if (start && len && name)
{
unsigned int plen;
plen = strlen(name);
if (plen)
{
char *p, *cur;
cur = start;
do
{
if ((p = memncasemem(cur, len, name, plen)) != 0)
{
len -= (p + plen) - cur;
cur = p + plen;
}
}
while (p && (*(p-1) != '<'));
if (p)
{
char *rptr = cur;
p = cur;
while ((*cur++ != '>') && (cur<(p+len)));
len -= cur - p;
do
{
if ((p = memncasemem(cur, len, name, plen)) != 0)
{
len -= (p + plen) - cur;
cur = p + plen;
if (*(p-2) == '<' && *(p-1) == '/' && *(p+plen) == '>')
break;
}
}
while (p);
ret = rptr;
}
}
}
return ret;
}
#define CASECMP(a,b) ( ((a) & 0xdf) == ((b) & 0xdf) )
#define NOCASECMP(a,b) ( ((a)^(b)) & 0xdf )
void *
memncasemem(const void *haystack, size_t haystacklen,
const void *needle, size_t needlelen)
{
void *rptr = 0;
if (haystack && needle && (needlelen > 0) && (haystacklen >= needlelen))
{
const char *ne = (const char *)needle + needlelen;
const char *he = (const char *)haystack + haystacklen;
const char *hne = he - needlelen;
char *ns, *hs = (char *)haystack;
do
{
rptr = 0;
ns = (char *)needle;
while((hs <= hne) && NOCASECMP(*hs,*ns))
hs++;
if (hs < hne)
{
rptr = hs;
while((hs < he) && (ns < ne) && !NOCASECMP(*hs,*ns))
{
hs++;
ns++;
}
}
else break;
}
while (ns < ne);
}
return rptr;
}
#if 0
const unsigned char *
boyermoore_horspool_memmem(const unsigned char* haystack, size_t hlen,
const unsigned char* needle, size_t nlen)
{
size_t scan = 0;
size_t bad_char_skip[UCHAR_MAX + 1]; /* Officially called:
* bad character shift */
/* Sanity checks on the parameters */
if (nlen <= 0 || !haystack || !needle)
return NULL;
/* ---- Preprocess ---- */
/* Initialize the table to default value */
/* When a character is encountered that does not occur
* in the needle, we can safely skip ahead for the whole
* length of the needle.
*/
for (scan = 0; scan <= UCHAR_MAX; scan = scan + 1)
bad_char_skip[scan] = nlen;
/* C arrays have the first byte at [0], therefore:
* [nlen - 1] is the last byte of the array. */
size_t last = nlen - 1;
/* Then populate it with the analysis of the needle */
for (scan = 0; scan < last; scan = scan + 1)
bad_char_skip[needle[scan]] = last - scan;
/* ---- Do the matching ---- */
/* Search the haystack, while the needle can still be within it. */
while (hlen >= nlen)
{
/* scan from the end of the needle */
for (scan = last; haystack[scan] == needle[scan]; scan = scan - 1)
if (scan == 0) /* If the first byte matches, we've found it. */
return haystack;
/* otherwise, we need to skip some bytes and start again.
Note that here we are getting the skip value based on the last byte
of needle, no matter where we didn't match. So if needle is: "abcd"
then we are skipping based on 'd' and that value will be 4, and
for "abcdd" we again skip on 'd' but the value will be only 1.
The alternative of pretending that the mismatched character was
the last character is slower in the normal case (Eg. finding
"abcd" in "...azcd..." gives 4 by using 'd' but only
4-2==2 using 'z'. */
hlen -= bad_char_skip[haystack[last]];
haystack += bad_char_skip[haystack[last]];
}
return NULL;
}
#endif
#ifdef WIN32
/* Source:
* https://mollyrocket.com/forums/viewtopic.php?p=2529
*/
void *
simple_mmap(int fd, unsigned int length, SIMPLE_UNMMAP *un)
{
HANDLE f;
HANDLE m;
void *p;
f = (HANDLE)_get_osfhandle(fd);
if (!f) return NULL;
m = CreateFileMapping(f, NULL, PAGE_READONLY, 0, 0, NULL);
if (!m) return NULL;
p = MapViewOfFile(m, FILE_MAP_READ, 0,0,0);
if (!p)
{
CloseHandle(m);
return NULL;
}
if (n) *n = GetFileSize(f, NULL);
if (un)
{
un->m = m;
un->p = p;
}
return p;
}
void
simple_unmmap(SIMPLE_UNMMAP *un)
{
UnmapViewOfFile(un->p);
CloseHandle(un->m);
}
#endif

191
utils/xmlgrep/xml.h Normal file
View file

@ -0,0 +1,191 @@
/* Copyright (c) 2007, 2008 by Adalin B.V.
* Copyright (c) 2007, 2008 by Erik Hofman
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of (any of) the copyrightholder(s) nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __XML_CONFIG
#define __XML_CONFIG 1
/**
* Open an XML file for processing
*
* @param fname path to the file
* @return XML-id which is used for further processing
*/
void *xmlOpen(const char *);
/**
* Close the XML file after which no further processing is possible
*
* @param xid XML-id
*/
void xmlClose(const void *);
/**
* Locate a subsection of the xml tree for further processing.
* This adds processing speed since the reuired nodes will only be searched
* in the subsection.
*
* The memory allocated for the XML-subsection-id has to be freed by the
* calling program.
*
* @param xid XML-id
* @param node path to the node containing the subsection
* @return XML-subsection-id for further processing
*/
void *xmlGetNode(const void *, const char *);
/**
* Copy a subsection of the xml tree for further processing.
* This is useful when it's required to process a section of the XML code
* after the file has been closed. The drawback is the added memory
* requirements.
*
* The memory allocated for the XML-subsection-id has to be freed by the
* calling program.
*
* @param xid XML-id
* @param node path to the node containing the subsection
* @return XML-subsection-id for further processing
*/
void *xmlCopyNode(void *, const char *);
/**
* Get the number of elements with the same name from a specified xml path
*
* @param xid XML-id
* @param path path to the xml node
* @return the number count of the nodename
*/
unsigned int xmlGetNumElements(void *, const char *);
/**
* Get the next occurrence of element in the parent node
*
* @param pid XML-id of the parent node of this node
* @param xid XML-id
* @param element name of the element to search for
* @return XML-subsection-id for further processing
*/
void *xmlGetNextElement(const void *, void *, const char *);
/**
* Compare the value of this element to a reference string.
* Comparing is done in a case insensitive way.
*
* @param xid XML-id
* @param s the string to compare to.
* @return an integer less than, equal to, ro greater than zero if the value
* of the node is found, respectively, to be less than, to match, or be greater
* than s
*/
int xmlCompareString(const void *, const char *);
/**
* Get a string of characters from a specified xml path
* This function has the advantage of not allocating its own return buffer,
* keeping the memory management to an absolute minimum but the disadvantage
* is that it's unreliable in multithread environments.
*
* @param xid XML-id
* @param path path to the xml node
* @param buffer the buffer to copy the string to
* @param buflen length of the destination buffer
* @return the length of the string
*/
unsigned int xmlCopyNodeString(void *, const char *, char *, const unsigned int);
/**
* Get a string of characters from the current node
* The returned string has to be freed by the calling program.
*
* @param xid XML-id
* @return a newly alocated string containing the contents of the node.
*/
char *xmlGetString(void *);
/**
* Get a string of characters from a specified xml path
* The returned string has to be freed by the calling program.
*
* @param xid XML-id
* @param path path to the xml node
* @return a newly alocated string containing the contents of the node.
*/
char *xmlGetNodeString(void *, const char *);
/**
* Compare the value of a node to a reference string.
* Comparing is done in a case insensitive way.
*
* @param xid XML-id
* @param path path to the xml node to compare to
* @param s the string to compare to.
* @return an integer less than, equal to, ro greater than zero if the value
* of the node is found, respectively, to be less than, to match, or be greater
* than s
*/
int xmlCompareNodeString(const void *, const char *, const char *);
/**
* Get the integer value from the current node
*
* @param xid XML-id
* @return the contents of the node converted to an integer value.
*/
long int xmlGetInt(void *);
/**
* Get an integer value from a specified xml path
*
* @param xid XML-id
* @param path path to the xml node
* @return the contents of the node converted to an integer value.
*/
long int xmlGetNodeInt(void *, const char *);
/**
* Get the double value from the curent node
*
* @param xid XML-id
* @return the contents of the node converted to a double value.
*/
double xmlGetDouble(void *);
/**
* Get a double value from a specified xml path
*
* @param xid XML-id
* @param path path to the xml node
* @return the contents of the node converted to a double value.
*/
double xmlGetNodeDouble(void *, const char *);
void *xmlMarkId(void *);
#endif /* __XML_CONFIG */

292
utils/xmlgrep/xmlgrep.c Normal file
View file

@ -0,0 +1,292 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "xml.h"
static const char *_static_root = "/";
static unsigned int _fcount = 0;
static char **_filenames = 0;
static char *_element = 0;
static char *_value = 0;
static char *_root = 0;
static char *_print = 0;
static int print_filenames = 0;
static void free_and_exit(int i);
#define SHOW_NOVAL(opt) \
{ \
printf("option '%s' requires a value\n\n", (opt)); \
free_and_exit(-1); \
}
void
show_help ()
{
printf("usage: xmlgrep [options] [file ...]\n\n");
printf("Options:\n");
printf("\t-h\t\tshow this help message\n");
printf("\t-e <id>\t\tshow sections that contain this element\n");
printf("\t-p <id>\t\tprint this element as the output\n");
printf("\t-r <path>\tspecify the XML search root\n");
printf("\t-v <string>\tshow sections where one of the elements has this ");
printf("value\n\n");
printf(" To print the contents of the 'type' element of the XML section ");
printf("that begins\n at '/printer/output' one would use the following ");
printf("syntax:\n\n\txmlgrep -r /printer/output -p type sample.xml\n\n");
printf(" To filter out sections that contain the 'driver' element with ");
printf("'generic' as\n it's value one would issue the following command:");
printf("\n\n\txmlgrep -r /printer/output -e driver -v generic sample.xml");
printf("\n\n");
free_and_exit(0);
}
void
free_and_exit(int i)
{
if (_root != _static_root) free(_root);
if (_element) free(_element);
if (_value) free(_value);
if (_print) free(_print);
if (_filenames)
{
for (i=0; i < _fcount; i++)
{
if (_filenames[i])
{
if (print_filenames) printf("%s\n", _filenames[i]);
free(_filenames[i]);
}
}
free(_filenames);
}
exit(i);
}
int
parse_option(char **args, int n, int max)
{
char *opt, *arg = 0;
int sz;
opt = args[n];
if (opt[0] == '-' && opt[1] == '-')
opt++;
if ((arg = strchr(opt, '=')) != NULL)
{
*arg++ = 0;
}
else if (++n < max)
{
arg = args[n];
#if 0
if (arg && arg[0] == '-')
arg = 0;
#endif
}
sz = strlen(opt);
if (strncmp(opt, "-help", sz) == 0)
{
show_help();
}
else if (strncmp(opt, "-root", sz) == 0)
{
if (arg == 0) SHOW_NOVAL(opt);
_root = strdup(arg);
return 2;
}
else if (strncmp(opt, "-element", sz) == 0)
{
if (arg == 0) SHOW_NOVAL(opt);
_element = strdup(arg);
return 2;
}
else if (strncmp(opt, "-value", sz) == 0)
{
if (arg == 0) SHOW_NOVAL(opt);
_value = strdup(arg);
return 2;
}
else if (strncmp(opt, "-print", sz) == 0)
{
if (arg == 0) SHOW_NOVAL(opt);
_print = strdup(arg);
return 2;
}
else if (strncmp(opt, "-list-filenames", sz) == 0)
{ /* undocumented test argument */
print_filenames = 1;
return 1;
}
else if (opt[0] == '-')
{
printf("Unknown option %s\n", opt);
free_and_exit(-1);
}
else
{
int pos = _fcount++;
if (_filenames == 0)
{
_filenames = (char **)malloc(sizeof(char*));
}
else
{
char **ptr = (char **)realloc(_filenames, _fcount*sizeof(char*));
if (ptr == 0)
{
printf("Out of memory.\n\n");
free_and_exit(-1);
}
_filenames = ptr;
}
_filenames[pos] = strdup(opt);
}
return 1;
}
void walk_the_tree(size_t num, void *xid, char *tree)
{
unsigned int q, no_elements;
char *elem, *next;
elem = tree;
if (*elem == '/') elem++;
next = strchr(elem, '/');
if (!next) /* last node from the tree */
{
void *elem_id = xmlMarkId(xid);
no_elements = xmlGetNumElements(xid, elem);
for (q=0; q<no_elements; q++)
{
void *node_id = xmlGetNextElement(xid, elem_id, elem);
if (node_id && _print)
{
unsigned int i, no_nodes;
void *xmid;
xmid = xmlMarkId(node_id);
no_nodes = xmlGetNumElements(node_id, _print);
for (i=0; i<no_nodes; i++)
{
if (xmlGetNextElement(node_id, xmid, _print) != 0)
{
char *value = xmlGetString(xmid);
if (value)
{
printf("%s: <%s>%s</%s>\n",
_filenames[num], _print, value, _print);
free(value);
}
}
}
free(xmid);
}
else if (node_id && _value)
{
if (_element)
{
unsigned int i, no_nodes;
void *xmid;
xmid = xmlMarkId(node_id);
no_nodes = xmlGetNumElements(node_id, _element);
for (i=0; i<no_nodes; i++)
{
xmlGetNextElement(node_id, xmid, _element);
if (xmlCompareString(xmid, _value) == 0)
{
printf("%s: <%s>%s</%s>\n",
_filenames[num], _element, _value, _element);
}
}
free(xmid);
}
else
{
}
}
else if (node_id && _element)
{
}
}
free(elem_id);
}
else /* walk the rest of the tree */
{
void *xmid;
xmid = xmlMarkId(xid);
if (xmid)
{
*next++ = 0;
no_elements = xmlGetNumElements(xid, elem);
for (q=0; q<no_elements; q++)
{
void *elem_id = xmlGetNextElement(xid, xmid, elem);
walk_the_tree(num, elem_id, next);
}
*--next = '/';
free(xmid);
}
else printf("Error executing xmlMarkId\n");
}
}
void grep_file(unsigned num)
{
void *xrid;
xrid = xmlOpen(_filenames[num]);
if (xrid)
{
void *xid = xmlMarkId(xrid);
walk_the_tree(num, xrid, _root);
free(xrid);
xrid = xid;
}
else
{
fprintf(stderr, "Error reading file '%s'\n", _filenames[num]);
}
xmlClose(xrid);
}
int
main (int argc, char **argv)
{
int i;
if (argc == 1)
show_help();
for (i=1; i<argc;)
{
int ret = parse_option(argv, i, argc);
i += ret;
}
if (_root == 0) (_root = (char *)_static_root);
for (i=0; i<_fcount; i++)
grep_file(i);
free_and_exit(0);
return 0;
}

View file

@ -1,249 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <simgear/props/props.hxx>
#include <simgear/props/props_io.hxx>
#include <simgear/structure/exception.hxx>
unsigned int _fcount = 0;
char **_filenames = 0;
char *_element = 0;
char *_value = 0;
char *_root = 0;
char *_print = 0;
int print_filenames = 0;
#define DEBUG 0
void free_and_exit(int i);
#define SHOW_NOVAL(opt) \
{ \
printf("option '%s' requires a value\n\n", (opt)); \
free_and_exit(-1); \
}
void show_help ()
{
printf("usage: xmlgrep [options] [file ...]\n\n");
printf("Options:\n");
printf("\t-h\t\tshow this help message\n");
printf("\t-e <id>\t\tshow sections that contain this element\n");
printf("\t-p <id>\t\tprint this element as the output\n");
printf("\t-r <path>\tspecify the XML search root\n");
printf("\t-v <string>\tshow sections where on of the elements has this value \n");
printf("\n");
printf(" To print the contents of the 'type' element of the XML section ");
printf("that begins\n at '/printer/output' one would use the following ");
printf("syntax:\n\n\txmlgrep -r /printer/output -p type sample.xml\n\n");
printf(" To filter out sections that contain the 'driver' element with ");
printf("'generic' as\n it's value one would issue the following command:\n");
printf("\n\txmlgrep -r /printer/output -e driver -v generic -p type ");
printf("sample.xml\n\n");
free_and_exit(0);
}
void free_and_exit(int i)
{
if (_root) free(_root);
if (_value) free(_value);
if (_element) free(_element);
if (_filenames)
{
for (i=0; i < _fcount; i++) {
if (_filenames[i]) {
if (print_filenames) printf("%s\n", _filenames[i]);
free(_filenames[i]);
}
}
free(_filenames);
}
exit(i);
}
int parse_option(char **args, int n, int max) {
char *opt, *arg = 0;
int sz;
opt = args[n];
if (opt[0] == '-' && opt[1] == '-')
opt++;
if ((arg = strchr(opt, '=')) != NULL)
*arg++ = 0;
else if (++n < max)
{
arg = args[n];
if (arg && arg[0] == '-')
arg = 0;
}
#if DEBUG
fprintf(stderr, "processing '%s'='%s'\n", opt, arg ? arg : "NULL");
#endif
sz = strlen(opt);
if (strncmp(opt, "-help", sz) == 0) {
show_help();
}
else if (strncmp(opt, "-root", sz) == 0) {
if (arg == 0) SHOW_NOVAL(opt);
_root = strdup(arg);
#if DEBUG
fprintf(stderr, "\troot=%s\n", _root);
#endif
return 2;
}
else if (strncmp(opt, "-element", sz) == 0) {
if (arg == 0) SHOW_NOVAL(opt);
_element = strdup(arg);
#if DEBUG
fprintf(stderr, "\telement=%s\n", _element);
#endif
return 2;
}
else if (strncmp(opt, "-value", sz) == 0) {
if (arg == 0) SHOW_NOVAL(opt);
_value = strdup(arg);
#if DEBUG
fprintf(stderr, "\tvalue=%s\n", _value);
#endif
return 2;
}
else if (strncmp(opt, "-print", sz) == 0) {
if (arg == 0) SHOW_NOVAL(opt);
_print = strdup(arg);
#if DEBUG
fprintf(stderr, "\tprint=%s\n", _print);
#endif
return 2;
}
/* undocumented test argument */
else if (strncmp(opt, "-list-filenames", sz) == 0) {
print_filenames = 1;
return 1;
}
else if (opt[0] == '-') {
printf("Unknown option %s\n", opt);
free_and_exit(-1);
}
else {
int pos = _fcount++;
if (_filenames == 0)
_filenames = (char **)malloc(sizeof(char*));
else {
char **ptr = (char **)realloc(_filenames, _fcount*sizeof(char*));
if (ptr == 0) {
printf("Out of memory.\n\n");
free_and_exit(-1);
}
_filenames = ptr;
}
_filenames[pos] = strdup(opt);
#if DEBUG
fprintf(stderr, "\tadding filenames[%i]='%s'\n", pos, _filenames[pos]);
#endif
}
return 1;
}
void grep_file(unsigned num)
{
SGPropertyNode root, *path;
#if DEBUG
fprintf(stderr, "Reading filenames[%i]: %s ... ", num, _filenames[num]);
#endif
try {
readProperties(_filenames[num], &root);
} catch (const sg_exception &e) {
fprintf(stderr, "Error reading file '%s'\n", _filenames[num]);
// free_and_exit(-1);
return;
}
#if DEBUG
fprintf(stderr, "done.\n");
#endif
if ((path = root.getNode(_root, false)) != NULL)
{
SGPropertyNode *elem;
if (_element && _value)
{
if ((elem = path->getNode(_element, false)) != NULL)
{
if (strcmp(elem->getStringValue(), _value) == NULL)
{
SGPropertyNode *print = path->getNode(_print, false);
if (print)
{
printf("%s: <%s>%s</%s>\n", _filenames[num],
_print, print->getStringValue(), _print);
}
}
}
}
else if (_element)
{
}
else if (_value)
{
}
}
#if DEBUG
else
fprintf(stderr," No root node specified.\n");
#endif
}
inline void grep_files()
{
#if DEBUG
fprintf(stderr, "Reading files ...\n");
#endif
for (int i=0; i<_fcount; i++)
grep_file(i);
}
int
main (int argc, char **argv)
{
int i;
if (argc == 1)
show_help();
for (i=1; i<argc;)
{
int ret = parse_option(argv, i, argc);
i += ret;
#if DEBUG
fprintf(stderr, "%i arguments processed.\n", ret);
#endif
}
grep_files();
free_and_exit(0);
return 0;
}