[Monetdb-developers] [Monetdb-pf-checkins] pathfinder/src/sqlhelpers/xmlshred encoding.c, , 1.15, 1.16 hash.c, , 1.7, 1.8

Sjoerd Mullender sjoerd at acm.org
Thu Jan 10 10:40:59 CET 2008


Why did you move the include of pf_config.h?  Generally, the *config.h
files need to be *first*.

Torsten Grust wrote:
> Update of /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred
> In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv10014
> 
> Modified Files:
> 	encoding.c hash.c 
> Log Message:
> -- pfshred now requires SAX2 (namespace URI processing)
> 
> -- Local name and URI hash tables operate with xmlChar* values
> 
> 
> 
> Index: encoding.c
> ===================================================================
> RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/encoding.c,v
> retrieving revision 1.15
> retrieving revision 1.16
> diff -u -d -r1.15 -r1.16
> --- encoding.c	9 Jan 2008 09:16:05 -0000	1.15
> +++ encoding.c	10 Jan 2008 09:02:16 -0000	1.16
> @@ -25,13 +25,14 @@
>   * $Id$
>   */
>  
> -#include "pf_config.h"
>  #include <stdio.h>
>  #include <string.h>
>  
>  /* libxml SAX2 parser internals */
>  #include "libxml/parserInternals.h"
>  
> +#include "pf_config.h"
> +
>  #include "encoding.h"
>  #include "guides.h"
>  #include "oops.h"
> @@ -40,6 +41,9 @@
>  
>  #include <assert.h>
>  
> +#ifndef HAVE_SAX2
> + #error "libxml2 SAX2 interface required to compile the XML shredder `pfshred'"
> +#endif
>  
>  FILE *out;
>  FILE *out_attr;
> @@ -256,15 +260,14 @@
>     
>      if (!localname)
>          return -1;
> +                     
> +    localname_id = hashtable_find (localname_hash, localname);
>  
> -    localname_id = hashtable_find (localname_hash, (char *) localname);
> -
> -    /* key not found */
>      if (NOKEY (localname_id)) {
> -        /* create a new name id */
> +        /* key not found, create a new name id */
>          localname_id = global_localname_id++;
> -        /* add the pair into the hashtable */
> -        hashtable_insert (localname_hash, (char *) localname, localname_id);
> +        /* add the (localname, localname_id) pair into the hash table */
> +        hashtable_insert (localname_hash, localname, localname_id);
>          /* print the name binding if necessary */
>          if (shredstate.names_separate)
>              fprintf (out_names, "%i, \"%s\"\n", localname_id, (char*) localname);
> @@ -282,14 +285,13 @@
>      if (!URI)
>          return -1;
>          
> -    uri_id = hashtable_find (uris_hash, (char *) URI);
> +    uri_id = hashtable_find (uris_hash, URI);
>  
> -    /* key not found */
>      if (NOKEY (uri_id)) {
> -        /* create a new URI id */
> +        /* key not found, create a new URI id */
>          uri_id = global_uri_id++;
> -        /* add the pair into the hashtable */
> -        hashtable_insert (uris_hash, (char *) URI, uri_id);
> +        /* add the (URI, uri_id) pair to the hash table */
> +        hashtable_insert (uris_hash, URI, uri_id);
>          /* print the URI binding if necessary */
>          if (shredstate.names_separate)
>              fprintf (out_uris, "%i, \"%s\"\n", uri_id, (char*) URI);
> @@ -313,7 +315,8 @@
>  
>      /* check if tagname is larger than TAG_SIZE characters */
>      if (localname && xmlStrlen (localname) > TAG_SIZE)
> -        BAILOUT ("attribute local name `%s' exceeds %u characters", localname, TAG_SIZE);
> +        BAILOUT ("attribute local name `%s' exceeds %u characters", 
> +                 localname, TAG_SIZE);
>      
>      if (URI && xmlStrlen (URI) > TAG_SIZE)
>          BAILOUT ("namespace URI `%s' exceeds length of %u characters", 
> @@ -686,7 +689,7 @@
>  report (void)
>  {
>      if (text_stripped > 0) {
> -        fprintf (err, "%u values were stripped to %u "
> +        fprintf (err, "%u text node/attribute values were stripped to %u "
>                        "character(s).\n", text_stripped, text_size);
>      }
>  }
> 
> Index: hash.c
> ===================================================================
> RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/hash.c,v
> retrieving revision 1.7
> retrieving revision 1.8
> diff -u -d -r1.7 -r1.8
> --- hash.c	8 Jan 2008 10:00:07 -0000	1.7
> +++ hash.c	10 Jan 2008 09:02:16 -0000	1.8
> @@ -25,39 +25,39 @@
>   * $Id$
>   */
>  
> -#include "pf_config.h"
> -#include "hash.h"
> -#include "shred_helper.h"
>  #include <string.h>
>  #include <unistd.h>
>  #include <assert.h>
>  
> +#include "pf_config.h"
> +#include "hash.h"
> +#include "shred_helper.h"
> +
>  /* We use a seperate chaining strategy to
> - * mantain our hash_table,
> - * So each bucket is a chained list itself,
> - * to handle possible collisions.
> + * mantain our hash table, so each bucket is a chained list itself
> + * to handle collisions.
>   */
>  struct bucket_t {
> -    char *key;      /**< key as string */
> -    int id;         /**< name_id */
> -    bucket_t* next; /**< next bucket in our list */
> +    xmlChar  *key;      /**< key (elem/attr name or namespace URI) */
> +    int       id;       /**< name_id */
> +    bucket_t *next;     /**< next bucket in overflow chain */
>  };
>  
> -/* size of the hashtable */
> +/* hashtable size */
>  #define PRIME 113
>  
>  /**
> - * Lookup an id in a given bucket using it associated key.
> + * Lookup an id in a given bucket using its associated key.
>   */
>  static int
> -find_id (bucket_t *bucket, char *key)
> +find_id (bucket_t *bucket, const xmlChar *key)
>  {
>      bucket_t *cur_bucket = bucket;
>      
>      assert (key);
>  
>      while (cur_bucket)
> -        if (strcmp (cur_bucket->key, key) == 0)
> +        if (xmlStrcmp (cur_bucket->key, key) == 0)
>              return cur_bucket->id;
>          else
>              cur_bucket = cur_bucket->next;
> @@ -69,16 +69,16 @@
>   * Attach an (id, key) pair to a given bucket list.
>   */
>  static bucket_t *
> -bucket_insert (bucket_t *bucket, char *key, int id)
> +bucket_insert (bucket_t *bucket, const xmlChar *key, int id)
>  {
>      int ident = find_id (bucket, key);
>      
> -    /* no key found */
>      if (NOKEY (ident)) {
> +        /* no key found */
>          bucket_t *newbucket = (bucket_t*) malloc (sizeof (bucket_t));
>  
>          newbucket->id = id;
> -        newbucket->key = strndup (key, strlen(key));
> +        newbucket->key = xmlStrdup (key);
>  
>          /* add new bucket to the front of list */
>          newbucket->next = bucket;
> @@ -95,13 +95,12 @@
>   * Create the hash value for a given key.
>   */
>  static int
> -find_hash_bucket (char *key)
> +find_hash_bucket (const xmlChar *key)
>  {   
>      assert (key);
>      
> -    size_t len = strlen (key);
> -    /* keys have at least length 1 */
> -    /* assert (len > 0); */
> +    size_t len = xmlStrlen (key);
> +
>      /* build a hash out of the first and the last character
>         and the length of the key */
>      return (key[0] * key[MAX(0,len-1)] * len) % PRIME;
> @@ -113,7 +112,9 @@
>  hashtable_t
>  new_hashtable (void)
>  {
> -    hashtable_t ht = malloc (PRIME * sizeof (bucket_t));
> +    hashtable_t ht;
> +    
> +    ht = (hashtable_t) malloc (PRIME * sizeof (bucket_t));
>      
>      /* initialize the hash table */
>      for (unsigned int i = 0; i < PRIME; i++)
> @@ -126,24 +127,25 @@
>   * Insert key and id into hashtable.
>   */
>  void
> -hashtable_insert (hashtable_t hash_table, char *key, int id)
> +hashtable_insert (hashtable_t hash_table, const xmlChar *key, int id)
>  {
>      int hashkey;
>      
> -    assert (hash_table && key);
> -    
> +    assert (hash_table);
> +    assert (key);
> +
>      hashkey = find_hash_bucket (key);
>      hash_table[hashkey] = bucket_insert (hash_table[hashkey], key, id);
> -    return;
>  }
>  
>  /**
>   * Find element in hashtable. 
>   */
>  int
> -hashtable_find (hashtable_t hash_table, char *key)
> +hashtable_find (hashtable_t hash_table, const xmlChar *key)
>  {
> -    assert (key);
> +    assert (key); 
> +    
>      return find_id (hash_table[find_hash_bucket (key)], key);
>  }
>  
> @@ -156,7 +158,6 @@
>      bucket_t *bucket, *free_bucket;
>      
>      assert (hash_table);
> -    if (!hash_table) return;
>  
>      for (int i = 0; i < PRIME; i++) {
>          bucket = hash_table[i];
> @@ -165,9 +166,11 @@
>              free_bucket = bucket;
>              bucket = bucket->next;
>              /* free the copied hash key */
> -            if (free_bucket->key) free (free_bucket->key);
> +            if (free_bucket->key) 
> +                xmlFree (free_bucket->key);
>              free (free_bucket);
>          }
>     }
> +
>     free(hash_table);
>  }
> 
> 
> -------------------------------------------------------------------------
> Check out the new SourceForge.net Marketplace.
> It's the best place to buy or sell services for
> just about anything Open Source.
> http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
> _______________________________________________
> Monetdb-pf-checkins mailing list
> Monetdb-pf-checkins at lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins


-- 
Sjoerd Mullender

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 369 bytes
Desc: OpenPGP digital signature
URL: <http://www.monetdb.org/pipermail/developers-list/attachments/20080110/8fa1f421/attachment.sig>


More information about the developers-list mailing list