www.delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/2005/01/29/11:19:17

X-Authentication-Warning: delorie.com: mail set sender to djgpp-workers-bounces using -f
From: <ams AT ludd DOT ltu DOT se>
Message-Id: <200501291618.j0TGIMHA025530@speedy.ludd.ltu.se>
Subject: hcreate() and friends (LONG)
To: DJGPP-WORKERS <djgpp-workers AT delorie DOT com>
Date: Sat, 29 Jan 2005 17:18:22 +0100 (CET)
X-Mailer: ELM [version 2.4ME+ PL78 (25)]
MIME-Version: 1.0
X-ltu-MailScanner-Information: Please contact the ISP for more information
X-ltu-MailScanner: Found to be clean
X-ltu-MailScanner-SpamScore: s
X-MailScanner-From: ams AT ludd DOT ltu DOT se
Reply-To: djgpp-workers AT delorie DOT com

Hello.

Here's my implementation of hcreate(), hdestroy() and hsearch() plus
the hash function _hash2v().

Some points:

* I'm adding the functional category "search" to libc's
documentation. I've put hcreate(), hdestroy() and hsearch() in that
category. I'll move lfind(), lsearch(), insque() and remque() to that
catetory if you're ok with the new category.

* I put the hash function hsearch() uses in posix/search/ and
<search.h> as its whole reason for being is that hsearch() calls
it. (Originally it was a static function in hcreate.c but I thought
that there's no reason for hiding it.) Is that ok, or do you perfer to
have it somewhere else?


Right,

						MartinS

Index: djgpp/include/search.h
===================================================================
RCS file: /cvs/djgpp/djgpp/include/search.h,v
retrieving revision 1.5
diff -p -u -r1.5 search.h
--- djgpp/include/search.h	15 Jan 2005 19:16:15 -0000	1.5
+++ djgpp/include/search.h	29 Jan 2005 16:05:42 -0000
@@ -24,19 +24,37 @@ __DJ_size_t
 
 #ifndef __STRICT_ANSI__
 
-void * lfind(const void *_key, void *_base, size_t *_nelp, size_t _width,
-	     int(*_compar)(const void *, const void *));
-void * lsearch(const void *_key, void *_base, size_t *_nelp, size_t _width,
-	       int(*_compar)(const void *, const void *));
+/* ENTRY type for hsearch(). */
+typedef struct entry {
+  char    *key;
+  void    *data;
+} ENTRY;
+
+/* ACTION type for hsearch(). */
+typedef enum {
+  FIND,
+  ENTER
+} ACTION;
+
+
+int	hcreate(size_t _nel);
+void	hdestroy(void);
+ENTRY *	hsearch(ENTRY _item, ACTION _action);
+void *	lfind(const void *_key, void *_base, size_t *_nelp, size_t _width,
+	      int(*_compar)(const void *, const void *));
+void *	lsearch(const void *_key, void *_base, size_t *_nelp, size_t _width,
+		int(*_compar)(const void *, const void *));
 
 #ifndef _POSIX_SOURCE
 
+/* qelem type for insque and remque. */
 typedef struct qelem {
   struct qelem *q_forw;
   struct qelem *q_back;
   char q_data[0];
 } qelem;
 
+unsigned long _hash2v(unsigned char *s, unsigned long *v2);
 void insque(struct qelem *_elem, struct qelem *_pred);
 void remque(struct qelem *_elem);
 
Index: djgpp/src/docs/kb/wc204.txi
===================================================================
RCS file: /cvs/djgpp/djgpp/src/docs/kb/wc204.txi,v
retrieving revision 1.173
diff -p -u -r1.173 wc204.txi
--- djgpp/src/docs/kb/wc204.txi	15 Jan 2005 19:16:16 -0000	1.173
+++ djgpp/src/docs/kb/wc204.txi	29 Jan 2005 16:05:45 -0000
@@ -1057,11 +1057,48 @@ The C99 file @file{stdbool.h} was added.
 @findex asctime_r AT r{ added}
 @findex ctime_r AT r{ added}
 @findex gmtime_r AT r{ added}
+@findex hcreate AT r{ added}
+@findex hdestroy AT r{ added}
+@findex hsearch AT r{ added}
 @findex lfind AT r{ added}
 @findex lsearch AT r{ added}
 @findex localtime_r AT r{ added}
 @findex strtok_r AT r{ added}
 @findex strerror_r AT r{ added}
 The POSIX functions @code{asctime_r}, @code{ctime_r}, @code{gmtime_r},
-@code{lfind}, @code{lsearch}, @code{localtime_r}, @code{strtok_r} and
+@code{hcreate}, @code{hdestroy}, @code{hsearch}, @code{lfind},
+@code{lsearch}, @code{localtime_r}, @code{strtok_r} and
 @code{strerror_r} were added.  
+
+@findex _hash2v AT r{ added}
+Hash function @code{_hash2v} used by @code{hsearch} was added.  
+
Index: djgpp/src/libc/posix/search/_hash2v.c
===================================================================
RCS file: djgpp/src/libc/posix/search/_hash2v.c
diff -N djgpp/src/libc/posix/search/_hash2v.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ djgpp/src/libc/posix/search/_hash2v.c	29 Jan 2005 16:05:47 -0000
@@ -0,0 +1,252 @@
+/*
+ * File _hash2v.c.
+ *
+ * From <http://burtleburtle.net/bob/hash/doobs.html>.
+ * Two lines from below quoted: 
+ * "By Bob Jenkins, 1996.  bob_jenkins AT burtleburtle DOT net.  You may use this
+ * code any way you wish, private, educational, or commercial.  It's free."
+ *
+ * Modifications by Martin Str@"omberg <ams AT ludd DOT ltu DOT se>:
+ *   - unnecessary code removed
+ *   - ub1 and ub4 typedefs converted into unsigned char and unsigned long
+ *   - proper C89 parameters
+ *   - detect length while calculating
+ *   - fixed initial value (0)
+ *   - generate a second hash value (b)
+ *   - #include**s added
+ *
+ */
+
+#include <search.h>
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bits set, and the deltas of all three
+  high bits or all three low bits, whether the original value of a,b,c
+  is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+  have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a 
+  structure that could supported 2x parallelism, like so:
+      a -= b; 
+      a -= c; x = (c>>13);
+      b -= c; a ^= x;
+      b -= a; x = (a<<8);
+      c -= a; b ^= x;
+      c -= b; x = (b>>13);
+      ...
+  Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
+  of that parallelism.  They've also turned some of those single-cycle
+  latency instructions into multi-cycle latency instructions.  Still,
+  this is the fastest good hash I could find.  There were about 2^^68
+  to choose from.  I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+unsigned long _hash2v( unsigned char *k, unsigned long *v2 )
+{
+  int cont, which;
+  unsigned long a, b, c, len;
+
+  /* Set up the internal state */
+  len = 0;
+  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+  c = 0;         /* No previous hash value. */
+
+   /*---------------------------------------- handle most of the key */
+  cont = 1;
+  while( cont )
+  {
+    for( which = 0; which < 12; which++ )
+    {
+      if( !k[which] )
+      {
+	cont = 0;
+	break;
+      }
+    }
+    if( cont )
+    {
+      a += (k[0] +((unsigned long)k[1]<<8) +((unsigned long)k[2]<<16)
+	    +((unsigned long)k[3]<<24));
+      b += (k[4] +((unsigned long)k[5]<<8) +((unsigned long)k[6]<<16)
+	    +((unsigned long)k[7]<<24));
+      c += (k[8] +((unsigned long)k[9]<<8) +((unsigned long)k[10]<<16)
+	    +((unsigned long)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len += 12;
+    }
+  }
+
+  len += which;
+
+   /*------------------------------------- handle the last 11 bytes */
+  c += len;
+  switch(which)              /* all the case statements fall through */
+  {
+  case 11: c+=((unsigned long)k[10]<<24);
+  case 10: c+=((unsigned long)k[9]<<16);
+  case 9 : c+=((unsigned long)k[8]<<8);
+    /* the first byte of c is reserved for the length */
+  case 8 : b+=((unsigned long)k[7]<<24);
+  case 7 : b+=((unsigned long)k[6]<<16);
+  case 6 : b+=((unsigned long)k[5]<<8);
+  case 5 : b+=k[4];
+  case 4 : a+=((unsigned long)k[3]<<24);
+  case 3 : a+=((unsigned long)k[2]<<16);
+  case 2 : a+=((unsigned long)k[1]<<8);
+  case 1 : a+=k[0];
+    /* case 0: nothing left to add */
+  }
+  mix(a,b,c);
+  /*-------------------------------------------- report the result */
+
+  *v2 = b;
+  return c;
+}
+
+
+#ifdef TEST
+/*
+ * Original hash function code.
+ *
+ */
+
+typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
+typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
+
+/*
+--------------------------------------------------------------------
+hash() -- hash a variable-length key into a 32-bit value
+  k       : the key (the unaligned variable-length array of bytes)
+  len     : the length of the key, counting by bytes
+  initval : can be any 4-byte value
+Returns a 32-bit value.  Every bit of the key affects every bit of
+the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+About 6*len+35 instructions.
+
+The best hash table sizes are powers of 2.  There is no need to do
+mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+use a bitmask.  For example, if you need only 10 bits, do
+  h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996.  bob_jenkins AT burtleburtle DOT net.  You may use this
+code any way you wish, private, educational, or commercial.  It's free.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable.  Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+ub4 hash( k, length, initval)
+register ub1 *k;        /* the key */
+register ub4  length;   /* the length of the key */
+register ub4  initval;  /* the previous hash, or an arbitrary value */
+{
+   register ub4 a,b,c,len;
+
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = initval;         /* the previous hash value */
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 12)
+   {
+      a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
+      b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
+      c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len -= 12;
+   }
+
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch(len)              /* all the case statements fall through */
+   {
+   case 11: c+=((ub4)k[10]<<24);
+   case 10: c+=((ub4)k[9]<<16);
+   case 9 : c+=((ub4)k[8]<<8);
+      /* the first byte of c is reserved for the length */
+   case 8 : b+=((ub4)k[7]<<24);
+   case 7 : b+=((ub4)k[6]<<16);
+   case 6 : b+=((ub4)k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=((ub4)k[3]<<24);
+   case 3 : a+=((ub4)k[2]<<16);
+   case 2 : a+=((ub4)k[1]<<8);
+   case 1 : a+=k[0];
+     /* case 0: nothing left to add */
+   }
+   mix(a,b,c);
+   /*-------------------------------------------- report the result */
+   return c;
+}
+
+
+/* Code to make sure that _hash2v() behaves identically to hash(). */
+
+#include <errno.h>
+#include <stdio.h>
+
+#define LEN 4096
+
+int main(int argc, char *argv[])
+{
+  char *ret;
+  char s[LEN];
+  FILE *f;
+  size_t len;
+  unsigned long hash_v, hash2v_v, junk;
+
+  if( argc != 2 )
+  {
+    fprintf(stderr, "Run like this: '%s <file name>'.\n", argv[0]);
+    return 1;
+  }
+
+  f = fopen(argv[1], "r");
+  if( !f )
+  {
+    fprintf(stderr, "Failed to open '%s', errno = %d.\n", argv[1], errno);
+    return 3;
+  }
+
+  ret = fgets(s, LEN, f);
+  while( ret )
+  {
+    len = strlen(s);
+    hash_v = hash(s, len, 0);
+    hash2v_v = _hash2v(s, &junk);
+    if( hash_v != hash2v_v )
+    {
+      printf("hash = 0x%08lx != 0x%08lx = hash2v\n", hash_v, hash2v_v);
+    }
+
+    ret = fgets(s, LEN, f);
+  }
+
+  return 0;
+}
+
+#endif
Index: djgpp/src/libc/posix/search/_hash2v.txh
===================================================================
RCS file: djgpp/src/libc/posix/search/_hash2v.txh
diff -N djgpp/src/libc/posix/search/_hash2v.txh
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ djgpp/src/libc/posix/search/_hash2v.txh	29 Jan 2005 16:05:47 -0000
@@ -0,0 +1,56 @@
+@ignore
+ * File _hash2v.txh.
+ *
+ * Copyright (C) 2005 Martin Str@"omberg <ams AT ludd DOT luth DOT se>.
+ *
+ * This software may be used freely so long as this copyright notice is
+ * left intact. There is no warranty on this software.
+ *
+@end ignore
+
+@node _hash2v, misc
+@findex _hash2v
+@subheading Syntax
+
+@example
+#include <search.h>
+
+unsigned long _hash2v(unsigned char *s, unsigned long *v2)
+@end example
+
+@subheading Description
+
+This function generate two hash values from the string @var{s}.  It
+returns the first and sets *@var{v2} to the second.  
+
+@subheading Return Value
+
+The first hash value.
+
+@subheading Portability
+
+@portability !ansi, !posix
+
+@subheading Example
+
+@example
+#include <search.h>
+#include <stdio.h>
+
+
+int main(void)
+@{
+  unsigned int hash_value1, hash_value2;
+
+  hash_value1 = _hash2v("", &hash_value2);
+  printf("_hash2v generated the values 0x%08lx and 0x%08lx from \"\" (the empty string).\n",
+         hash_value1, hash_value2);
+  hash_value1 = _hash2v("a", &hash_value2);
+  printf("_hash2v generated the values 0x%08lx and 0x%08lx from \"a\".\n"
+         hash_value1, hash_value2);
+
+  return 0;
+@}
+
+@end example
+
Index: djgpp/src/libc/posix/search/hcreate.c
===================================================================
RCS file: djgpp/src/libc/posix/search/hcreate.c
diff -N djgpp/src/libc/posix/search/hcreate.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ djgpp/src/libc/posix/search/hcreate.c	29 Jan 2005 16:05:47 -0000
@@ -0,0 +1,105 @@
+/*
+ * File hcreate.c.
+ *
+ * Copyright (C) 2005 Martin Str@"omberg <ams AT ludd DOT ltu DOT se>.
+ *
+ * This software may be used freely so long as this copyright notice is
+ * left intact. There is no warranty on this software.
+ *
+ */
+
+#include <search.h>
+#include <stdlib.h>
+
+/*
+ * It's impossible to have hash table with more than 0x20000000 elements
+ * as sizeof(ENTRY) == 8.
+ */
+#define MAX_BUCKETS 0x20000000
+
+static ENTRY *hash = NULL;
+static size_t buckets = 0;
+static size_t mask = -1;
+
+int hcreate( size_t nel )
+{
+  size_t i;
+
+  /* Fail anything > MAX_BUCKETS. */
+  if( MAX_BUCKETS < nel )
+  {
+    return 0;
+  }
+
+  /* Increase with 1/3. */
+  nel += nel/3;
+
+  /* Decrease to MAX_BUCKETS if necessary. */
+  if( MAX_BUCKETS < nel )
+  {
+    nel = MAX_BUCKETS;
+  }
+
+  /* Minimum is 128 elements. Algorithm doesn't depend on this, though. */
+  i = 7;
+  if( nel < 0x80 )
+  {
+    nel = 0x80;
+  }
+
+  /* Round up to next power-of-2. */
+  while( (1U<<i) < nel )
+  {
+    i++;
+  }
+  
+  buckets = 1U<<i;
+  mask = buckets-1;
+
+  hash = calloc(buckets, sizeof(ENTRY));
+
+  return NULL != hash;
+}
+
+void hdestroy(void)
+{
+  free( hash );
+  hash = NULL;
+}
+
+ENTRY *hsearch(ENTRY item, ACTION action)
+{
+  unsigned long hash_value, hash_value_start, increment;
+
+  hash_value = _hash2v(item.key, &increment);
+  hash_value &= mask;
+  hash_value_start = hash_value;
+  increment = 2*increment+1; /* Make sure increment is odd. */
+
+  do {
+    if( ! hash[hash_value].key )
+    {
+      if( FIND == action )
+      {
+	return NULL;
+      }
+      else
+      {
+	hash[hash_value] = item;
+	return &hash[hash_value];
+      }
+    }
+    else
+    {
+      if( ! strcmp(hash[hash_value].key, item.key) )
+      {
+	return &hash[hash_value];
+      }
+    
+      hash_value += increment;
+      hash_value &= mask;
+    }
+  } while( hash_value != hash_value_start );
+
+  return NULL;
+}
Index: djgpp/src/libc/posix/search/hcreate.txh
===================================================================
RCS file: djgpp/src/libc/posix/search/hcreate.txh
diff -N djgpp/src/libc/posix/search/hcreate.txh
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ djgpp/src/libc/posix/search/hcreate.txh	29 Jan 2005 16:05:47 -0000
@@ -0,0 +1,204 @@
+@ignore
+ * File hcreate.txh.
+ *
+ * Copyright (C) 2005 Martin Str@"omberg <ams AT ludd DOT luth DOT se>.
+ *
+ * This software may be used freely so long as this copyright notice is
+ * left intact. There is no warranty on this software.
+ *
+@end ignore
+
+@node hcreate, search
+@findex hcreate
+@subheading Syntax
+
+@example
+#include <search.h>
+
+int hcreate(size_t nel)
+@end example
+
+@subheading Description
+
+This function creates a hash table that has room for @var{nel}
+elements.  
+
+There can only be at most one hash table in use at any time.  
+
+See also @code{hdestroy} (@pxref{hdestroy}) and @code{hsearch}
+(@pxref{hsearch}).  
+
+@subheading Return Value
+
+Non-zero if successful, or zero if the hash table couldn't be
+created.  
+
+@subheading Portability
+
+@portability !ansi, posix
+
+@subheading Example
+
+@example
+#include <search.h>
+#include <stdio.h>
+
+#define N_ELS	1000
+
+int main(void)
+@{
+
+  if( hcreate(N_ELS) )
+  @{
+    printf("Hash table created successfully.\n");
+  @}
+  else
+  @{
+    printf("Failed to create hash table.\n");
+  @}
+
+  return 0;
+@}
+
+@end example
+
+@c ----------------------------------------------------------------------
+@node hdestroy, search
+@findex hdestroy
+@subheading Syntax
+
+@example
+#include <search.h>
+
+void hdestroy(void)
+@end example
+
+@subheading Description
+
+This function destroys the hash table previously created by
+a call to @code{hcreate} (@pxref{hcreate}).  
+
+See also @code{hsearch} (@pxref{hsearch}).  
+
+@subheading Return Value
+
+None.
+
+@subheading Portability
+
+@portability !ansi, posix
+
+@subheading Example
+
+@example
+#include <search.h>
+
+#define N_ELS 10000
+
+int main(void)
+@{
+
+  if( hcreate(N_ELS) )
+  @{
+    printf("Hash table created successfully.\n");
+
+    /*
+       Insert elements into the hash table and look them up.
+       ...
+     */
+
+    hdestroy();
+  @}
+  else
+  @{
+    printf("Failed to create hash table.\n");
+  @}
+
+  return 0;
+@}
+
+@end example
+
+@c ----------------------------------------------------------------------
+@node hsearch, search
+@findex hsearch
+@subheading Syntax
+
+@example
+#include <search.h>
+
+ENTRY * hsearch(ENTRY item, ACTION action);
+@end example
+
+@subheading Description
+
+This function searches the hash table previously created by a call to
+@code{hcreate} (@pxref{hcreate}) for the key @var{item.key}.  If the
+key is found a pointer to that entry is returned.  If the key is not
+found and action is ENTER, @var{item} is added to the hash table and a
+pointer to the newly added entry is returned.  If the key is not found
+and action is FIND, @code{NULL} is returned.  
+
+The comparison function used is @code{strcmp}.  
+
+See also @code{hdestroy} (@pxref{hdestroy}).  
+
+@subheading Return Value
+
+@code{NULL} if action is ENTER but hash table is full.  
+A pointer to the found or inserted item, if action is ENTER.  
+
+A pointer to the found item or @code{NULL}, if action is FIND.  
+
+@subheading Portability
+
+@portability !ansi, posix
+
+@subheading Example
+
+@example
+#include <search.h>
+
+#define N_ELS 10000
+
+int main(void)
+@{
+  ENTRY item, *item_p;
+
+  if( ! hcreate(N_ELS) )
+  @{
+    printf("Hash table could not be created.\n");
+    return 1;
+  @}
+
+  /* Fill hash. */
+  while( /* element to insert */ )
+  @{
+    item.key = /* allocate space for key and setting it */ 
+    item.data = /* allocate space for data and setting it */
+    item_p = hsearch(item, ENTER);
+    if( ! item_p )
+    @{
+      printf("Out of memory.\n");
+      return 1;
+    @}
+  @}
+
+  /* Look up element. */
+  item.key = /* some key */
+  item_p = hsearch(item, FIND)
+  if( item_p )
+  @{
+    printf("Found key '%s' with data pointing to %p.\n", item.key, item.data);
+  @}
+  else
+  @{
+    printf("Can't find key '%s'.\n", item.key);
+  @}
+
+  hdestroy();
+
+  return 0;
+@}
+
+@end example
Index: djgpp/src/libc/posix/search/makefile
===================================================================
RCS file: /cvs/djgpp/djgpp/src/libc/posix/search/makefile,v
retrieving revision 1.1
diff -p -u -r1.1 makefile
--- djgpp/src/libc/posix/search/makefile	15 Jan 2005 19:16:16 -0000	1.1
+++ djgpp/src/libc/posix/search/makefile	29 Jan 2005 16:05:47 -0000
@@ -2,6 +2,7 @@
 
 TOP=../..
 
+SRC += _hash2v.c
 SRC += hcreate.c
 SRC += lfind.c
 
Index: djgpp/tests/libc/posix/search/_hash2v.c
===================================================================
RCS file: djgpp/tests/libc/posix/search/_hash2v.c
diff -N djgpp/tests/libc/posix/search/_hash2v.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ djgpp/tests/libc/posix/search/_hash2v.c	29 Jan 2005 16:05:47 -0000
@@ -0,0 +1,159 @@
+/*
+ * File _hash2v.c.
+ *
+ * Copyright (C) 2005 Martin Str@"omberg <ams AT ludd DOT ltu DOT se>.
+ *
+ * This software may be used freely so long as this copyright notice is
+ * left intact. There is no warranty on this software.
+ *
+ */
+
+#include <errno.h>
+#include <math.h>
+#include <search.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define LEN 4096
+#define MAX_BUCKETS 0x20000000
+
+int main(int argc, char *argv[])
+{
+  char *ret;
+  char s[LEN];
+  FILE *f;
+  long double chi2, corr, p_inv, s2_h, s2_h2;
+  size_t buckets, i, len, lines, mask, v, v2;
+  size_t *h, *h2;
+
+  if( argc != 3 )
+  {
+    fprintf(stderr, "Run like this: '%s <buckets> <file name>'.\n"
+	    "Make sure that the lines are unique (run the files throught 'sort -u' e.g.),\n"
+	    "otherwise the results will not be correct.\n", argv[0]);
+    return 1;
+  }
+
+  buckets = strtoul(argv[1], NULL, 0);
+
+  /* This number of bucket finding _must_ give the same number of buckets
+     as the one in hcreate(). */
+  buckets += buckets/3;
+  i = 7;
+  if( buckets < 0x80 )
+  {
+    buckets = 0x80;
+  }
+  if( MAX_BUCKETS < buckets )
+  {
+    buckets = MAX_BUCKETS;
+    i = 26;
+  }
+
+  while( (1<<i) < buckets )
+  {
+    i++;
+  }
+  buckets = 1<<i;
+  mask = buckets-1;
+  printf("Using %lu buckets (mask 0x%08lx).\n", buckets, mask);
+
+  h = calloc(buckets, sizeof(size_t));
+  if( !h )
+  {
+    fprintf(stderr, "Out of memory while trying to allocate 0x%lx bytes.\n", buckets*sizeof(size_t));
+    return 2;
+  }
+  h2 = calloc(buckets, sizeof(size_t));
+  if( !h2 )
+  {
+    fprintf(stderr, "Out of memory while trying to allocate 0x%lx bytes.\n", buckets*sizeof(size_t));
+    return 2;
+  }
+
+  f = fopen(argv[2], "r");
+  if( !f )
+  {
+    fprintf(stderr, "Failed to open '%s', errno = %d.\n", argv[2], errno);
+    return 3;
+  }
+
+  corr = 0;
+  s2_h = 0;
+  s2_h2 = 0;
+  lines = 0;
+  ret = fgets(s, LEN, f);
+  while( ret )
+  {
+    len = strlen(s);
+    if( LEN-1 <= len )
+    {
+      fprintf(stderr, "Warning: Found line (%ld) with length at least %d (if longer\n"
+	      "it's been splitted).\n", lines, LEN-1);
+    }
+    lines++;
+    v = _hash2v(s, &v2)&mask;
+    v2 &= mask;
+
+#if 0
+    printf("v=0x%08lx, v2=0x%08lx\n", v, v2);
+#endif
+
+    /* For chi2 test. */
+    h[v]++;
+    if( ! h[v] )
+    {
+      fprintf(stderr, "Overflow while increasing bucket 0x%08lx.\n", v);
+      return 4;
+    }
+    h2[v2]++;
+    if( ! h2[v2] )
+    {
+      fprintf(stderr, "Overflow while increasing bucket 0x%08lx.\n", v2);
+      return 4;
+    }
+
+    /* For correlation coefficent. */
+    s2_h += (v-((long double)buckets)/2)*(v-((long double)buckets)/2);
+    s2_h2 += (v2-((long double)buckets)/2)*(v2-((long double)buckets)/2);
+    corr += (v-((long double)buckets)/2)*(v2-((long double)buckets)/2);
+
+    ret = fgets(s, LEN, f);
+  }
+
+  printf("Hashed %ld lines.\n", lines);
+
+  /* Calculate chi2 value for the first hash value. */
+  p_inv = buckets;
+
+  chi2 = 0;
+  for( i = 0; i < buckets; i++ )
+  {
+    chi2 += p_inv*h[i]*h[i]/lines;
+  }
+  chi2 -= lines;
+  printf("Chi2 of first hash value = %Lf. Expected %ld+-%f (%f..%f).\n"
+	 , chi2, buckets, 3*sqrt(buckets), buckets-3*sqrt(buckets)
+	 , buckets+3*sqrt(buckets));
+
+  /* Calculate chi2 value for the second hash value. */
+  chi2 = 0;
+  for( i = 0; i < buckets; i++ )
+  {
+    chi2 += p_inv*h2[i]*h2[i]/lines;
+  }
+  chi2 -= lines;
+  printf("Chi2 of second hash value = %Lf. Expected %ld+-%f (%f..%f).\n"
+	 , chi2, buckets, 3*sqrt(buckets), buckets-3*sqrt(buckets)
+	 , buckets+3*sqrt(buckets));
+
+  /* Calculate correlation coefficient. */
+  s2_h /= lines-1;
+  s2_h2 /= lines-1;
+  corr /= lines-1;
+
+  printf("Correlation coefficient = %Lf.\n", corr/sqrt(s2_h)/sqrt(s2_h2));
+  
+
+  return 0;
+}
Index: djgpp/tests/libc/posix/search/hcreate.c
===================================================================
RCS file: djgpp/tests/libc/posix/search/hcreate.c
diff -N djgpp/tests/libc/posix/search/hcreate.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ djgpp/tests/libc/posix/search/hcreate.c	29 Jan 2005 16:05:47 -0000
@@ -0,0 +1,201 @@
+/*
+ * File hcreate.c.
+ *
+ * Copyright (C) 2005 Martin Str@"omberg <ams AT ludd DOT ltu DOT se>.
+ *
+ * This software may be used freely so long as this copyright notice is
+ * left intact. There is no warranty on this software.
+ *
+ */
+
+#include <errno.h>
+#include <search.h>
+#include <stdio.h>
+#include <stdlib.h>
+/*#include <string.h>*/
+
+
+#define BUF_LEN 2048
+#define COMPARE_FUN  my_p_strcmp
+
+
+char buffer[BUF_LEN];
+char *program_name;
+char **list;
+size_t n_list = 0;
+size_t size_list = 128;
+
+
+static int my_p_strcmp( const void *el1, const void *el2)
+{
+  return strcmp( *(const char **)el1, *(const char **)el2 );
+}
+
+
+static void print_help_and_exit(int exit_value)
+{
+  fprintf(stderr, "Run like this: '%s <hash size> <input file>'\n", program_name);
+  exit( exit_value );
+}
+
+
+/* 
+ * Makes sure that s is in list.
+ * Returns:
+ * 0 if s already was in list
+ * 1 if s is new
+ */
+static int list_add(char **p)
+{
+  size_t old_n_list;
+
+  if( size_list <= n_list+1 )
+  {
+    char **new_list;
+    
+    new_list = realloc(list, 2*size_list*sizeof(*list));
+    if( ! new_list )
+    {
+      fprintf(stderr, "Failed to allocate memory (%ld bytes) for check list.\n",
+	      2*size_list*sizeof(*list));
+      print_help_and_exit(4);
+    }
+
+    list = new_list;
+    size_list *= 2;
+  }
+
+  old_n_list = n_list;
+  lsearch(p, list, &n_list, sizeof(char *), COMPARE_FUN);
+  
+  return old_n_list != n_list;
+}
+
+
+int main(int argc, char *argv[])
+{
+  char ch, *retp, scratch[32];
+  ENTRY *e_p, item;
+  FILE *in;
+  int buckets, ret;
+  size_t len, lines;
+
+  program_name = argv[0];
+
+  if( argc != 3 )
+  {
+    print_help_and_exit(1);
+  }
+  
+  ret = sscanf(argv[1], "%i%c", &buckets, &ch);
+  if( ret != 1 )
+  {
+    print_help_and_exit(2);
+  }
+
+  if( buckets <= 0 )
+  {
+    fprintf(stderr, "%d buckets makes no sense. Using 1000.\n", buckets);
+    buckets = 1000;
+  }
+
+  in = fopen(argv[2], "r");
+  if( ! in )
+  {
+    fprintf(stderr, "Failed to open input file '%s', errno = %d.\n", 
+	    argv[2], errno);
+    print_help_and_exit(3);
+  }
+
+  list = malloc(size_list*sizeof(*list));
+  if( ! list )
+  {
+    fprintf(stderr, "Failed to allocate memory for check list.\n");
+    return 4;
+  }
+
+  if( ! hcreate(buckets) )
+  {
+    fprintf(stderr, "hcreate(%d) failed.\n", buckets);
+    return 5;
+  }
+
+  item.data = NULL;
+
+  /* Check for non-existing elements. */
+  item.key = scratch;
+  item.key[0] = 0;
+  e_p = hsearch(item, FIND);
+  if( e_p )
+  {
+    printf("ERROR: found element with key '%s' but hash is empty.\n",
+	   item.key);
+    return 42;
+  }
+  strcpy(item.key, "Baba Yaga");
+  e_p = hsearch(item, FIND);
+  if( e_p )
+  {
+    printf("ERROR: found element with key '%s' but hash is empty.\n",
+           item.key);
+    return 42;
+  }
+
+  /* Read file. */
+  lines = 0;
+  retp = fgets(buffer, BUF_LEN, in);
+  while( retp )
+  {
+    lines++;
+
+    /* Insert every line. */
+    len = strlen(buffer);
+    item.key = malloc((len+1)*sizeof(char));
+    if( ! item.key )
+    {
+      fprintf(stderr, "Out of memory after inserting %lu elements from %lu lines.\n",
+	      n_list, lines);
+      return 4;
+    }
+    strcpy(item.key, buffer);
+    ret = list_add(&item.key);
+    e_p = hsearch(item, FIND);
+    if( ret )
+    {
+      if( e_p )
+      {
+	printf("ERROR at the %luth element from %lu lines: new element\n"
+	       "('%s') not in list, but found in hash.\n",
+	       n_list, lines, item.key);
+	return 42;
+      }
+      e_p = hsearch(item, ENTER);
+      if( ! e_p )
+      {
+	printf("Hash full when inserting the %luth element from %lu lines (new element is\n"
+	       "'%s').\n",
+	       n_list, lines, item.key);
+	return 0;
+      }
+    }
+    else
+    {
+      if( ! e_p )
+      {
+	printf("ERROR at the %luth element from %lu lines: old element\n"
+               "('%s') in list, but not found in hash.\n",
+	       n_list, lines, item.key);
+	return 42;
+      }
+    }
+
+    retp = fgets(buffer, BUF_LEN, in);
+  }
+
+  /* Check for non-existing element. */
+
+  printf("Test done. Inserted %lu elements from %lu lines.\n",
+	 n_list, lines);
+  
+  return 0;
+}
Index: djgpp/tests/libc/posix/search/makefile
===================================================================
RCS file: /cvs/djgpp/djgpp/tests/libc/posix/search/makefile,v
retrieving revision 1.1
diff -p -u -r1.1 makefile
--- djgpp/tests/libc/posix/search/makefile	15 Jan 2005 19:16:16 -0000	1.1
+++ djgpp/tests/libc/posix/search/makefile	29 Jan 2005 16:05:47 -0000
@@ -1,5 +1,8 @@
 TOP=../..
 
+SRC += _hash2v.c
+SRC += hcreate.c
+SRC += hfill.c
 SRC += lfind.c
 SRC += lfind2.c
 

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019