1
0
Fork 0

* reorganize the code to be able to skip comment sections

* depreciate __xmlFindNextElement and use __xmlGetNode instead
* xmlGetNextElement now returns char* instead of void* for furute use
* add preliminary support for wildcards in the search path ('*' and '?')
This commit is contained in:
ehofman 2008-07-06 11:34:50 +00:00
parent 67ca3c0307
commit 6e6e8bbefc
4 changed files with 230 additions and 227 deletions

View file

@ -1,3 +1,9 @@
06-07-2008
* reorganize the code to be able to skip comment sections
* depreciate __xmlFindNextElement and use __xmlGetNode instead
* xmlGetNextElement now returns char* instead of void* for furute use
* add preliminary support for wildcards in the search path ('*' and '?')
01-07-2008 01-07-2008
* fix a problem caused by removing the last unnecessary alloc * fix a problem caused by removing the last unnecessary alloc
* strip leading-, and trailing spaces from the string before comparing * strip leading-, and trailing spaces from the string before comparing

View file

@ -1,5 +1,5 @@
/* Copyright (c) 2007,2008 by Adalin B.V. /* Copyright (c) 2007, 2008 by Adalin B.V.
* Copyright (c) 2007,2008 by Erik Hofman * Copyright (c) 2007, 2008 by Erik Hofman
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -60,6 +60,7 @@ void simple_unmmap(SIMPLE_UNMMAP *);
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
struct _xml_id struct _xml_id
{ {
char *start; char *start;
@ -69,8 +70,17 @@ struct _xml_id
static char *__xmlCopyNode(char *, size_t, const char *); static char *__xmlCopyNode(char *, size_t, const char *);
static char *__xmlGetNode(char *, size_t, const char *, size_t *); static char *__xmlGetNode(char *, size_t, const char *, size_t *);
static char *__xmlFindNextElement(char *, size_t, const char *); static void *__xml_memmem(const void *, size_t, const void *, size_t);
static void *memncasemem(const void *, size_t, const void *, size_t); static void *__xml_memncasecmp(void *, size_t, void **, size_t *);
#define PRINT(a, b, c) { \
size_t q, len=(((b)>(c)) ? (c) : (b)); \
if (a) { \
printf("(%i) '", len); \
for (q=0; q<len; q++) printf("%c", (a)[q]); \
printf("'\n"); \
} else printf("NULL pointer at line %i\n", __LINE__); \
}
void * void *
xmlOpen(char *fn) xmlOpen(char *fn)
@ -169,11 +179,11 @@ xmlGetNode(void *id, char *path)
return (void *)xsid; return (void *)xsid;
} }
void * const char *
xmlGetNextElement(const void *pid, void *id, char *path) xmlGetNextElement(const void *pid, void *id, char *path)
{ {
struct _xml_id *xpid = (struct _xml_id *)pid; struct _xml_id *xpid = (struct _xml_id *)pid;
struct _xml_id *xsid = 0; const char *ret;
if (id && path) if (id && path)
{ {
@ -190,11 +200,13 @@ xmlGetNextElement(const void *pid, void *id, char *path)
{ {
xid->len = rlen; xid->len = rlen;
xid->start = ptr; xid->start = ptr;
xsid = xid; ret = path;
} }
else ret = 0;
} }
else ret = 0;
return (void *)xsid; return ret;
} }
int int
@ -443,8 +455,13 @@ xmlGetNumElements(void *id, const char *path)
clen = xid->len; clen = xid->len;
} }
while ((p = __xmlFindNextElement(p, clen, nname)) != 0) do
ret++; {
unsigned int slen = strlen(nname);
p = __xmlGetNode(p, clen, nname, &slen);
if (p) ret++;
}
while (p);
} }
return ret; return ret;
@ -494,47 +511,110 @@ __xmlGetNode(char *start, size_t len, const char *path, size_t *rlen)
if (len && rlen && *rlen) if (len && rlen && *rlen)
{ {
size_t elem_len, newpath_len;
char *newpath, *element;
char last_node = 0; char last_node = 0;
char *ptr, *name;
size_t plen, slen;
slen = *rlen; newpath_len = *rlen;
name = (char *)path; element = (char *)path;
if (*name == '/') if (*element == '/')
{ {
name++; /* skip the leading '/' character */ element++; /* skip the leading '/' character */
slen--; newpath_len--;
} }
ptr = strchr(name, '/'); newpath = strchr(element, '/');
if (!ptr) if (!newpath)
{ {
last_node = 1; last_node = 1;
plen = slen; elem_len = newpath_len;
} }
else else
{ {
plen = ptr++ - name; elem_len = newpath++ - element;
slen -= (ptr - name); newpath_len -= (newpath - element);
} }
if (plen) if (elem_len)
{ {
char *p, *cur; char *p, *cur;
size_t newlen;
void *newelem;
cur = start; cur = p = start;
do do
{ {
if ((p = memncasemem(cur, len, name, plen)) != 0) len -= cur - p;
p = memchr(cur, '<', len);
if (p)
{ {
len -= (p + plen) - cur; p++;
cur = p + plen; if (p >= (cur+len)) return 0;
len -= p - cur;
cur = p;
/* skip comments */
if (memcmp(cur, "!--", 3) == 0)
{
if (len < 6) return 0;
cur += 3;
len -= 3;
do
{
p = memchr(cur, '-', len);
if (p)
{
len -= p - cur;
if ((len > 3) && (memcmp(cur, "-->", 3) == 0))
{
p += 3;
len -= 3;
break;
}
cur = p+1;
}
else return 0;
}
while (p && (len > 2));
if (!p || (len < 2)) return 0;
}
else if (*cur == '?')
{
if (len < 3) return 0;
cur++;
len--;
p = memchr(cur, '?', len);
if (!p || *(p+1) != '>') return 0;
p += 2;
len -= (p - cur);
}
else
{
newlen = elem_len;
newelem = element;
cur = __xml_memncasecmp(p, len, &newelem, &newlen);
if (cur)
{
break;
}
cur = p + elem_len;
}
} }
} }
while (p && (*(p-1) != '<')); while (p);
if (p) if (cur && p)
{ {
len -= elem_len;
p = cur; p = cur;
while ((*cur++ != '>') && (cur<(p+len))); while ((*cur++ != '>') && (cur<(p+len)));
len -= cur - p; len -= cur - p;
@ -544,26 +624,28 @@ __xmlGetNode(char *start, size_t len, const char *path, size_t *rlen)
char *rptr = cur; char *rptr = cur;
do do
{ {
if ((p = memncasemem(cur, len, name, plen)) != 0) if ((p = __xml_memmem(cur, len, "</", 2)) != 0)
{ {
len -= (p + plen) - cur; char *r;
cur = p + plen;
if (*(p-2) == '<' && *(p-1) == '/' len -= (p + 2) - cur;
&& *(p+plen) == '>') break; cur = p + 2;
r = __xml_memncasecmp(cur, len, &newelem, &newlen);
if (r && *r == '>') break;
} }
} }
while (p); while (p);
if (p) if (p)
{ {
*rlen = p-rptr-2; *rlen = p-rptr;
ret = rptr; ret = rptr;
} }
} }
else else
{ {
*rlen = slen; *rlen = newpath_len;
ret = __xmlGetNode(cur, len, ptr, rlen); ret = __xmlGetNode(cur, len, newpath, rlen);
} }
} }
} }
@ -572,156 +654,89 @@ __xmlGetNode(char *start, size_t len, const char *path, size_t *rlen)
return ret; return ret;
} }
char *
__xmlFindNextElement(char *start, size_t len, const char *name)
{
char *ret = 0;
if (start && len && name)
{
unsigned int plen;
plen = strlen(name);
if (plen)
{
char *p, *cur;
cur = start;
do
{
if ((p = memncasemem(cur, len, name, plen)) != 0)
{
len -= (p + plen) - cur;
cur = p + plen;
}
}
while (p && (*(p-1) != '<'));
if (p)
{
char *rptr = cur;
p = cur;
while ((*cur++ != '>') && (cur<(p+len)));
len -= cur - p;
do
{
if ((p = memncasemem(cur, len, name, plen)) != 0)
{
len -= (p + plen) - cur;
cur = p + plen;
if (*(p-2) == '<' && *(p-1) == '/' && *(p+plen) == '>')
break;
}
}
while (p);
ret = rptr;
}
}
}
return ret;
}
#define CASECMP(a,b) ( ((a) & 0xdf) == ((b) & 0xdf) )
#define NOCASECMP(a,b) ( ((a)^(b)) & 0xdf ) #define NOCASECMP(a,b) ( ((a)^(b)) & 0xdf )
void * void *
memncasemem(const void *haystack, size_t haystacklen, __xml_memmem(const void *haystack, size_t haystacklen,
const void *needle, size_t needlelen) const void *needle, size_t needlelen)
{ {
void *rptr = 0; void *rptr = 0;
if (haystack && needle && (needlelen > 0) && (haystacklen >= needlelen)) if (haystack && needle && (needlelen > 0) && (haystacklen >= needlelen))
{ {
const char *ne = (const char *)needle + needlelen; char *ns, *hs, *ptr;
const char *he = (const char *)haystack + haystacklen;
const char *hne = he - needlelen; hs = (char *)haystack;
char *ns, *hs = (char *)haystack; ns = (char *)needle;
do do
{ {
rptr = 0; ptr = memchr(hs, *ns, haystacklen);
ns = (char *)needle; if (ptr)
while((hs <= hne) && NOCASECMP(*hs,*ns))
hs++;
if (hs < hne)
{ {
rptr = hs; haystacklen -= (ptr - hs);
while((hs < he) && (ns < ne) && !NOCASECMP(*hs,*ns))
if (haystacklen < needlelen) break;
if (memcmp(ptr, needle, needlelen) == 0)
{ {
hs++; rptr = ptr;
ns++; break;
} }
hs = ptr+1;
} }
else break; else break;
} }
while (ns < ne); while (haystacklen > needlelen);
} }
return rptr; return rptr;
} }
#if 0 void *
const unsigned char * __xml_memncasecmp(void *haystack, size_t haystacklen,
boyermoore_horspool_memmem(const unsigned char* haystack, size_t hlen, void **needle, size_t *needlelen)
const unsigned char* needle, size_t nlen)
{ {
size_t scan = 0; void *rptr = 0;
size_t bad_char_skip[UCHAR_MAX + 1]; /* Officially called:
* bad character shift */ if (haystack && needle && needlelen && (*needlelen > 0)
&& (haystacklen >= *needlelen))
/* Sanity checks on the parameters */
if (nlen <= 0 || !haystack || !needle)
return NULL;
/* ---- Preprocess ---- */
/* Initialize the table to default value */
/* When a character is encountered that does not occur
* in the needle, we can safely skip ahead for the whole
* length of the needle.
*/
for (scan = 0; scan <= UCHAR_MAX; scan = scan + 1)
bad_char_skip[scan] = nlen;
/* C arrays have the first byte at [0], therefore:
* [nlen - 1] is the last byte of the array. */
size_t last = nlen - 1;
/* Then populate it with the analysis of the needle */
for (scan = 0; scan < last; scan = scan + 1)
bad_char_skip[needle[scan]] = last - scan;
/* ---- Do the matching ---- */
/* Search the haystack, while the needle can still be within it. */
while (hlen >= nlen)
{ {
/* scan from the end of the needle */ char *ns, *hs;
for (scan = last; haystack[scan] == needle[scan]; scan = scan - 1) size_t i;
if (scan == 0) /* If the first byte matches, we've found it. */
return haystack; ns = (char *)*needle;
hs = (char *)haystack;
/* otherwise, we need to skip some bytes and start again.
Note that here we are getting the skip value based on the last byte /* search for everything */
of needle, no matter where we didn't match. So if needle is: "abcd" if ((*ns == '*') && (*needlelen == 1))
then we are skipping based on 'd' and that value will be 4, and {
for "abcdd" we again skip on 'd' but the value will be only 1. char *he = hs + haystacklen;
The alternative of pretending that the mismatched character was
the last character is slower in the normal case (Eg. finding while ((hs < he) && (*hs != ' ') && (*hs != '>')) hs++;
"abcd" in "...azcd..." gives 4 by using 'd' but only *needle = (void *)haystack;
4-2==2 using 'z'. */ *needlelen = hs - (char *)haystack;
hlen -= bad_char_skip[haystack[last]]; rptr = hs;
haystack += bad_char_skip[haystack[last]]; }
else
{
size_t nlen = *needlelen;
for (i=0; i<nlen; i++)
{
if (NOCASECMP(*hs,*ns) && (*ns != '?'))
break;
hs++;
ns++;
}
if (i == nlen) rptr = hs;
}
} }
return NULL; return rptr;
} }
#endif
#ifdef WIN32 #ifdef WIN32
/* Source: /* Source:

View file

@ -84,13 +84,14 @@ unsigned int xmlGetNumElements(void *, const char *);
/** /**
* Get the next occurrence of element in the parent node * Get the next occurrence of element in the parent node
* The return value should neevr be altered or freed by the caller
* *
* @param pid XML-id of the parent node of this node * @param pid XML-id of the parent node of this node
* @param xid XML-id * @param xid XML-id
* @param element name of the element to search for * @param element name of the element to search for
* @return XML-subsection-id for further processing * @return name of the element or NULL if unsuccessful
*/ */
void *xmlGetNextElement(const void *, void *, const char *); const char *xmlGetNextElement(const void *, void *, const char *);
/** /**
* Compare the value of this element to a reference string. * Compare the value of this element to a reference string.

View file

@ -5,6 +5,7 @@
#include "xml.h" #include "xml.h"
static const char *_static_root = "/"; static const char *_static_root = "/";
static const char *_static_element = "*";
static unsigned int _fcount = 0; static unsigned int _fcount = 0;
static char **_filenames = 0; static char **_filenames = 0;
static char *_element = 0; static char *_element = 0;
@ -46,7 +47,7 @@ void
free_and_exit(int i) free_and_exit(int i)
{ {
if (_root && _root != _static_root) free(_root); if (_root && _root != _static_root) free(_root);
if (_element) free(_element); if (_element && _element != _static_element) free(_element);
if (_value) free(_value); if (_value) free(_value);
if (_print) free(_print); if (_print) free(_print);
if (_filenames) if (_filenames)
@ -153,92 +154,71 @@ parse_option(char **args, int n, int max)
void walk_the_tree(size_t num, void *xid, char *tree) void walk_the_tree(size_t num, void *xid, char *tree)
{ {
unsigned int q, no_elements; unsigned int i, no_elements;
char *elem, *next;
elem = tree; if (!tree) /* last node from the tree */
if (*elem == '/') elem++;
next = strchr(elem, '/');
if (!next) /* last node from the tree */
{ {
void *elem_id = xmlMarkId(xid); void *xmid = xmlMarkId(xid);
if (xmid && _print)
no_elements = xmlGetNumElements(xid, elem);
for (q=0; q<no_elements; q++)
{ {
void *node_id = xmlGetNextElement(xid, elem_id, elem); no_elements = xmlGetNumElements(xmid, _print);
if (node_id && _print) for (i=0; i<no_elements; i++)
{ {
unsigned int i, no_nodes; if (xmlGetNextElement(xid, xmid, _print) != 0)
void *xmid;
xmid = xmlMarkId(node_id);
no_nodes = xmlGetNumElements(node_id, _print);
for (i=0; i<no_nodes; i++)
{ {
if (xmlGetNextElement(node_id, xmid, _print) != 0) char *value = xmlGetString(xmid);
if (value)
{ {
char *value = xmlGetString(xmid); printf("%s: <%s>%s</%s>\n",
if (value) _filenames[num], _print, value, _print);
{ free(value);
printf("%s: <%s>%s</%s>\n",
_filenames[num], _print, value, _print);
free(value);
}
} }
} }
free(xmid);
}
else if (node_id && _value)
{
if (_element)
{
unsigned int i, no_nodes;
void *xmid;
xmid = xmlMarkId(node_id);
no_nodes = xmlGetNumElements(node_id, _element);
for (i=0; i<no_nodes; i++)
{
xmlGetNextElement(node_id, xmid, _element);
if (xmlCompareString(xmid, _value) == 0)
{
printf("%s: <%s>%s</%s>\n",
_filenames[num], _element, _value, _element);
}
}
free(xmid);
}
else
{
}
}
else if (node_id && _element)
{
} }
free(xmid);
} }
free(elem_id); else if (xmid && _value)
{
no_elements = xmlGetNumElements(xmid, _element);
for (i=0; i<no_elements; i++)
{
if ((xmlGetNextElement(xid, xmid, _element) != 0)
&& (xmlCompareString(xmid, _value) == 0))
{
printf("%s: <%s>%s</%s>\n",
_filenames[num], _element, _value, _element);
}
}
free(xmid);
}
else if (xmid && _element)
{
}
else printf("Error executing xmlMarkId\n");
} }
else /* walk the rest of the tree */ else if (xid) /* walk the rest of the tree */
{ {
char *elem, *next;
void *xmid; void *xmid;
elem = tree;
if (*elem == '/') elem++;
next = strchr(elem, '/');
xmid = xmlMarkId(xid); xmid = xmlMarkId(xid);
if (xmid) if (xmid)
{ {
*next++ = 0; if (next) *next++ = 0;
no_elements = xmlGetNumElements(xid, elem); no_elements = xmlGetNumElements(xid, elem);
for (q=0; q<no_elements; q++) for (i=0; i<no_elements; i++)
{ {
void *elem_id = xmlGetNextElement(xid, xmid, elem); if (xmlGetNextElement(xid, xmid, elem) != 0)
walk_the_tree(num, elem_id, next); walk_the_tree(num, xmid, next);
} }
*--next = '/'; if (next) *--next = '/';
free(xmid); free(xmid);
} }
@ -280,7 +260,8 @@ main (int argc, char **argv)
i += ret; i += ret;
} }
if (_root == 0) (_root = (char *)_static_root); if (_root == 0) _root = (char *)_static_root;
if (_element == 0) _element = (char *)_static_element;
for (i=0; i<_fcount; i++) for (i=0; i<_fcount; i++)
grep_file(i); grep_file(i);