[muddle] [PATCH 1/1] Implement OBLISTs.

keziahw at gmail.com keziahw at gmail.com
Fri Feb 2 12:34:54 PST 2018


From: Kaz Wesley <kaz at lambdaverse.org>

Signed-off-by: Kaz Wesley <kaz at lambdaverse.org>
---
 src/Makefile.am  |  4 ++-
 src/alloc.c      | 15 +++++++++
 src/alloc.h      |  5 ++-
 src/atom.c       | 34 +++++++++++++++++++
 src/atom.h       | 23 ++++++++-----
 src/eval.c       |  1 +
 src/hash.h       | 45 ++++++++++++++++++++++++++
 src/main.c       | 10 ++++++
 src/object.c     |  5 +++
 src/object.h     | 65 +++++++++++++++++++++++++++----------
 src/oblist.c     | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/oblist.h     | 39 ++++++++++++++++++++++
 src/print.c      | 36 +++++++++++++++++++++
 src/read.c       |  3 +-
 src/test_oblists |  9 ++++++
 15 files changed, 365 insertions(+), 28 deletions(-)
 create mode 100644 src/hash.h
 create mode 100644 src/oblist.c
 create mode 100644 src/oblist.h
 create mode 100755 src/test_oblists

diff --git a/src/Makefile.am b/src/Makefile.am
index 922b9b6..68f749c 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,3 +1,5 @@
 bin_PROGRAMS = muddle
-muddle_SOURCES = main.c read.c eval.c print.c alloc.c object.c atom.c
+muddle_SOURCES = main.c read.c eval.c print.c alloc.c object.c atom.c oblist.c
 muddle_CFLAGS = -Wall -Wno-unused-function -Werror=implicit-function-declaration -Werror=incompatible-pointer-types
+
+TESTS = test_oblists
diff --git a/src/alloc.c b/src/alloc.c
index ad55630..c3243d9 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -17,6 +17,7 @@ License along with this file. If not, see
 */
 
 #include "alloc.h"
+#include "atom.h"
 #include "object.h"
 
 extern pool_object *pool;
@@ -86,3 +87,17 @@ heap_copy_array_rev (const object * objs, uint32_t len)
     }
   return p;
 }
+
+uv_val *
+UV_VAL (heap_ptr p)
+{
+  assert (p > 0);
+  return (uv_val*)&vhp_base[p];
+}
+
+atom_body *
+ATOM_BODY (heap_ptr p)
+{
+  assert (p);
+  return (atom_body*)(&vhp_base[p]);
+}
diff --git a/src/alloc.h b/src/alloc.h
index 58ee808..bd7c46b 100644
--- a/src/alloc.h
+++ b/src/alloc.h
@@ -28,16 +28,19 @@ typedef int32_t heap_ptr;
 
 typedef union pool_object pool_object;
 typedef union object object;
+typedef union uv_val uv_val;
 
 pool_object *POOL_OBJECT (pool_ptr p);
 object *HEAP_OBJECT (heap_ptr p);
+uv_val *UV_VAL (heap_ptr p);
 
 pool_ptr pool_alloc (uint32_t len);
 heap_ptr heap_alloc (uint32_t len);
 inline static heap_ptr
 heap_alloc_uv (uint32_t len)
 {
-  return heap_alloc ((len + 1) >> 1);
+  // divide by 2 (rounding up), then add one for dope
+  return heap_alloc (((len + 1) >> 1) + 1);
 }
 
 // given a headerless array of objects of known size,
diff --git a/src/atom.c b/src/atom.c
index 38cadc0..13dab03 100644
--- a/src/atom.c
+++ b/src/atom.c
@@ -18,3 +18,37 @@ License along with this file. If not, see
 
 #include "alloc.h"
 #include "atom.h"
+
+#include <string.h>
+
+typedef struct atom_body
+{
+  evaltype type;		// UNBOUND/LOCI
+  // bindid
+  // value ptr
+  // oblist ptr
+  // type ptr
+  char pname[];
+} atom_body;
+
+atom_object atom_create (const char * name, uint32_t namelen)
+{
+  // C-compatible strings for simplicity
+  namelen += 1;
+  heap_ptr body = atom_body_alloc (namelen);
+  atom_body *content = (atom_body *)HEAP_OBJECT (body);
+  memcpy(&content->pname, name, namelen-1);
+  content->pname[namelen-1] = '\0';
+  atom_object new = new_atom (body, namelen);
+  return new;
+}
+
+heap_ptr atom_body_alloc (uint32_t namelen)
+{
+  return heap_alloc_uv ((sizeof (atom_body) + namelen + 63) / 64);
+}
+
+const char * atom_pname (atom_object o)
+{
+  return ATOM_BODY (o.val.body)->pname;
+}
diff --git a/src/atom.h b/src/atom.h
index 5b71ad3..dad084e 100644
--- a/src/atom.h
+++ b/src/atom.h
@@ -21,14 +21,21 @@ License along with this file. If not, see
 
 #include "object.h"
 
-typedef struct
+typedef struct atom_body atom_body;
+
+// Return an atom with a newly-allocated body.
+atom_object atom_create (const char * name, uint32_t namelen);
+
+atom_body * ATOM_BODY (heap_ptr p);
+
+const char * atom_pname (atom_object o);
+
+inline static uint32_t
+atom_namelen (atom_object body)
 {
-  evaltype type;		// UNBOUND/LOCI
-  // bindid
-  // value ptr
-  // oblist ptr
-  // type ptr
-  const char pname[];
-} atom_body;
+  return body.val.namelen;
+}
+
+heap_ptr atom_body_alloc (uint32_t ct);
 
 #endif
diff --git a/src/eval.c b/src/eval.c
index f322946..7d28057 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -141,6 +141,7 @@ eval ()
     {
     case EVALTYPE_FIX32:
     case EVALTYPE_FIX64:
+    case EVALTYPE_ATOM:
       RETURN (cf->args.body[0]);
     case EVALTYPE_LIST:
       // Handle `head` now; then iterate on `.rest`.
diff --git a/src/hash.h b/src/hash.h
new file mode 100644
index 0000000..5534110
--- /dev/null
+++ b/src/hash.h
@@ -0,0 +1,45 @@
+/*
+Copyright (C) 2018 Keziah Wesley
+
+You can redistribute and/or modify this file under the terms of the
+GNU Affero General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with this file. If not, see
+<http://www.gnu.org/licenses/>.
+*/
+
+#ifndef HASH_H
+#define HASH_H
+
+#include <stddef.h>
+
+// Very fast, non collision-resistant hash
+
+inline static uint32_t
+fnv_32a_init()
+{
+  return 0x811c9dc5;
+}
+
+inline static uint32_t
+fnv_32a_extend(const void *buf, size_t len, uint32_t hval)
+{
+  unsigned char *bp = (unsigned char *)buf;
+  unsigned char *be = bp + len;
+  while (bp < be) {
+    hval ^= (uint32_t)*bp++;
+    /* multiply by the 32 bit FNV magic prime mod 2^32 */
+    hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
+  }
+  return hval;
+}
+
+#endif // HASH_H
diff --git a/src/main.c b/src/main.c
index 6645f6e..a75964d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -20,6 +20,7 @@ License along with this file. If not, see
 #include "eval.h"
 #include "print.h"
 #include "object.h"
+#include "oblist.h"
 
 #include <stdio.h>
 #include <sys/mman.h>
@@ -31,6 +32,9 @@ pool_ptr ptop;
 object *vhp_base;
 heap_ptr vhp;
 
+// oblists (move to ASOCs once implemented)
+uvector_object root;
+
 // TODO: store these in current PROCESS
 frame *cf;
 object ret;
@@ -86,6 +90,7 @@ main ()
       // mock GC (no object persistence)
       ptop = 1;
       vhp = 1;
+      root = oblist_create (13);
       // terminate input
       assert (buf[n - 1] == '\n');
       buf[n - 1] = '\0';
@@ -112,6 +117,11 @@ main ()
       // Print the thing
       print_object (&ret);
       printf ("\n");
+      /*
+      // debugging oblists...
+      print_object ((object*) &root);
+      printf ("\n");
+      */
       // Loop!
     }
 
diff --git a/src/object.c b/src/object.c
index ff65f41..1e77049 100644
--- a/src/object.c
+++ b/src/object.c
@@ -64,3 +64,8 @@ static object rest(const object *lst) {
     return o;
 }
 */
+
+dope_object * uv_dope (const uvector_object *o)
+{
+  return (dope_object*) &HEAP_OBJECT (o->val.body)[(o->val.len + 1) / 2 + 1];
+}
diff --git a/src/object.h b/src/object.h
index ec322b5..38b2882 100644
--- a/src/object.h
+++ b/src/object.h
@@ -192,7 +192,7 @@ typedef struct
 
 typedef struct
 {
-  alignas (8) uint32_t _pad;
+  alignas (8) uint32_t namelen;
   heap_ptr body;
 } atom_val;
 typedef struct
@@ -221,6 +221,18 @@ typedef struct
   uint32_t gc;
 } dope_object;
 
+/// Value half of a poolable object, for storage in a uvector.
+typedef union uv_val
+{
+  fix32_val fix32;
+  fix64_val fix64;
+  list_val list;
+  vector_val vector;
+  uvector_val uvector;
+  subr_val subr;
+  atom_val atom;
+} uv_val;
+
 /// Object of a type that can be stored in the pool.
 /// NB. a pool_object* can point outside the pool; contrast with pool_ptr.
 typedef union pool_object
@@ -231,7 +243,7 @@ typedef union pool_object
     // NB. never take the address of these type-punned fields!
     alignas (16) evaltype type;
     pool_ptr rest;
-    opaque64 val;
+    uv_val val;
   };
   /// objects of statically known type
   fix32_object fix32;
@@ -242,18 +254,6 @@ typedef union pool_object
   atom_object atom;
 } pool_object;
 
-/// Value half of a poolable object, for storage in a uvector.
-typedef union
-{
-  fix32_val fix32;
-  fix64_val fix64;
-  list_val list;
-  vector_val vector;
-  uvector_val uvector;
-  subr_val subr;
-  atom_val atom;
-} uv_val;
-
 union object
 {
   /// any object has a type
@@ -313,6 +313,7 @@ new_list (pool_ptr head)
   ,};
 }
 
+// TODO: take a dope_object like uvector
 static inline vector_object
 new_vector (heap_ptr body, uint32_t length)
 {
@@ -329,7 +330,7 @@ new_uvector (heap_ptr body, uint32_t length)
 {
   return (uvector_object)
   {
-    .type = EVALTYPE_VECTOR,.rest = 0,.val = (uvector_val)
+    .type = EVALTYPE_UVECTOR,.rest = 0,.val = (uvector_val)
     {
     .len = length,.body = body}
   };
@@ -355,22 +356,45 @@ new_subr (void (*fn) ())
 }
 
 static inline atom_object
-new_atom (pool_ptr body)
+new_atom (pool_ptr body, uint32_t namelen)
 {
   return (atom_object)
   {
     .type = EVALTYPE_ATOM,.rest = 0,.val = (atom_val)
     {
-    .body = body}
+      .body = body,.namelen = namelen}
   };
 }
 
+static inline dope_object
+new_dope (uint32_t len, evaltype type)
+{
+  return (dope_object)
+    {
+      .type = type,.grow = 0,.len = len,.gc = 0};
+}
+
 /**
 Common object operations.
 */
 
 uint32_t list_length (const list_object * o);
 
+dope_object * uv_dope (const uvector_object * o);
+
+static inline evaltype
+utype (const uvector_object * o)
+{
+  return uv_dope (o)->type;
+}
+
+// Change the EVALTYPE of an object. New type must have same PRIMTYPE.
+static inline void chtype (object *o, evaltype type)
+{
+  assert (TYPEPRIM_EQ (o->type, type));
+  o->type = type;
+}
+
 /**
 Checked downcasts.
 */
@@ -403,4 +427,11 @@ as_pool (object * p)
   return (pool_object *) p;
 }
 
+static inline atom_object *
+as_atom (object * o)
+{
+  assert (TYPEPRIM_EQ (o->type, EVALTYPE_ATOM));
+  return &o->atom;
+}
+
 #endif // OBJECT_H
diff --git a/src/oblist.c b/src/oblist.c
new file mode 100644
index 0000000..99f483e
--- /dev/null
+++ b/src/oblist.c
@@ -0,0 +1,99 @@
+/*
+Copyright (C) 2017-2018 Keziah Wesley
+
+You can redistribute and/or modify this file under the terms of the
+GNU Affero General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with this file. If not, see
+<http://www.gnu.org/licenses/>.
+*/
+
+#include "alloc.h"
+#include "atom.h"
+#include "hash.h"
+#include "object.h"
+#include "oblist.h"
+
+#include <assert.h>
+#include <string.h>
+
+uvector_object
+oblist_create (uint32_t buckct)
+{
+  heap_ptr body = heap_alloc_uv (buckct);
+  memset (UV_VAL (body), '\0', buckct * sizeof (uv_val));
+  uvector_object oblist = new_uvector(body, buckct);
+  *uv_dope (&oblist) = new_dope (buckct, EVALTYPE_LIST);
+  chtype ((object*)&oblist, EVALTYPE_OBLIST);
+  return oblist;
+}
+
+// TODO: define for other numeric types, move to general utility library
+inline static uint32_t u32_min (uint32_t x, uint32_t y) { return (x < y) ? x : y; }
+#define MIN(x, y)                                 \
+  _Generic((x+y),                                  \
+           uint32_t: u32_min(x, y)		   \
+          )
+
+// debugging:
+#include <stdio.h>
+atom_object
+oblist_find_or_insert (uvector_object oblist, const char * name, uint32_t namelen)
+{
+  assert(oblist.val.len);
+  uint32_t hash = fnv_32a_extend(name, namelen, fnv_32a_init());
+  uint32_t nbucket = hash % oblist.val.len;
+  list_val * bucket = &UV_VAL (oblist.val.body)[nbucket].list;
+  // linear search in the bucket
+  pool_ptr * prev = &bucket->head;
+  pool_ptr tail = 0;
+  while (*prev)
+  {
+    atom_object * other = &POOL_OBJECT (*prev)->atom;
+    int cmp = strncmp(name, atom_pname(*other), MIN (namelen, atom_namelen(*other)));
+    if (cmp < 0)
+      {
+	tail = *prev;
+	break;
+      }
+    else if (!cmp)
+      return *other;
+    prev = &POOL_OBJECT (*prev)->rest;
+  }
+  // Either:
+  // - reached end of list without finding; tail==0
+  // - found where it should be in the list; tail is the rest (if any)
+  *prev = pool_alloc (1);
+  atom_object new = atom_create (name, namelen);
+  POOL_OBJECT (*prev)->atom = new;
+  POOL_OBJECT (*prev)->atom.rest = tail;
+  return new;
+}
+
+/*
+object
+oblist_lookup (uvector_object oblist, string_object name)
+{
+}
+*/
+
+#if 0
+atom_object
+intern_atom (const char *p, int len)
+{
+  // TODO: look up in existing oblists first
+
+  uint32_t lenobjs = (len + sizeof (object) - 1) / sizeof (object);
+  //heap_ptr body = heap_alloc (sizeof (atom_body) + lenobjs);
+  heap_ptr body = atom_body_alloc (1);
+  return new_atom (0);
+}
+#endif
diff --git a/src/oblist.h b/src/oblist.h
new file mode 100644
index 0000000..18badd5
--- /dev/null
+++ b/src/oblist.h
@@ -0,0 +1,39 @@
+/*
+Copyright (C) 2017-2018 Keziah Wesley
+
+You can redistribute and/or modify this file under the terms of the
+GNU Affero General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with this file. If not, see
+<http://www.gnu.org/licenses/>.
+*/
+
+#ifndef OBLIST_H
+#define OBLIST_H
+
+#include "object.h"
+
+// UVECTOR of LISTs of ATOMs
+// each LIST is a hash bucket
+// each LIST is sorted by PNAME
+// each LIST is headed by a LOSE sentry object
+
+// for now, 1 global oblist
+extern uvector_object root;
+
+uvector_object oblist_create (uint32_t buckct);
+
+atom_object oblist_find_or_insert (uvector_object oblist, const char * name, uint32_t namelen);
+
+// returns a ATOM object or #FALSE ()
+//object oblist_lookup (uvector_object oblist, const char * name, uint32_t namelen);
+
+#endif
diff --git a/src/print.c b/src/print.c
index 3a5866a..fe1f663 100644
--- a/src/print.c
+++ b/src/print.c
@@ -16,6 +16,7 @@ License along with this file. If not, see
 <http://www.gnu.org/licenses/>.
 */
 
+#include "atom.h"
 #include "print.h"
 #include "object.h"
 
@@ -39,6 +40,28 @@ print_vector_body (const vector_object * o)
     }
 }
 
+static void
+print_uvector_body (const uvector_object * o)
+{
+  const uv_val *p = UV_VAL (o->val.body);
+  if (!p)
+    return;
+  pool_object x;
+  x.type = utype (o);
+  x.rest = 0;
+  if (o->val.len)
+    {
+      x.val = p[0];
+      print_object ((object*) &x);
+    }
+  for (uint32_t i = 1; i < o->val.len; i++)
+    {
+      printf (" ");
+      x.val = p[i];
+      print_object ((object*) &x);
+    }
+}
+
 static void
 print_list_body (const list_object * o)
 {
@@ -79,7 +102,20 @@ print_object (const object * o)
       print_vector_body (&o->vector);
       printf ("]");
       break;
+    case EVALTYPE_OBLIST:
+      // for now, handle non-primtype print as special case
+      printf ("#OBLIST ");
+      // FALLTHROUGH
+    case EVALTYPE_UVECTOR:
+      printf ("![");
+      print_uvector_body (&o->uvector);
+      printf ("!]");
+      break;
+    case EVALTYPE_ATOM:
+      printf ("%s", atom_pname(o->atom));
+      break;
     default:
+      fprintf (stderr, "Tried to print the unprintable: 0x%x\n", o->type);
       assert (0 && "I don't know how to print that");
     }
 }
diff --git a/src/read.c b/src/read.c
index b8570f3..0f6a63d 100644
--- a/src/read.c
+++ b/src/read.c
@@ -18,6 +18,7 @@ License along with this file. If not, see
 
 #include "read.h"
 #include "object.h"
+#include "oblist.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -262,7 +263,7 @@ read_token (const char *p, reader_stack * st)
 	n = count_pname (p);
 	if (n > 0)
 	  {
-	    (--(st->pos))->atom = new_atom (0);
+	    (--(st->pos))->atom = oblist_find_or_insert (root, p, n);
 	    st->framelen++;
 	    return p + n;
 	  }
diff --git a/src/test_oblists b/src/test_oblists
new file mode 100755
index 0000000..7ce5674
--- /dev/null
+++ b/src/test_oblists
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+assert_eq () {
+    [ "$1" = "$2" ] || ( echo Assertion failed: "\"$1\"" = "\"$2\""; exit 1 )
+}
+
+assert_eq foo "$(echo foo | ./muddle)"
+assert_eq '(foo bar baz)' "$(echo '(foo bar baz)' | ./muddle)"
+assert_eq '(foo foo foo)' "$(echo '(foo foo foo)' | ./muddle)"
-- 
2.15.0




More information about the muddle mailing list