faster startup 8/n, copy-on-write

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

faster startup 8/n, copy-on-write

Paolo Bonzini
This patch enables GNU Smalltalk to use copy-on-write for the image file
whenever possible.  It is damn small compared to what it achieves!

Note that most of the previous speedup work was also a prerequisite for
this patch, since copy-on-write requires e.g. the OOP table's address to
not change between save and load.  The speedup achieved is 16ms on my
machine, or about 30%.

The additional benefit is that the parts of the image file that are not
modified can be shared by all the instances of GNU Smalltalk.

We have now another leader in the profile, that is _gst_intern_string.
So, further speedups can be obtained by doing less work to reset the
global variables of the VM.  For example, we need
_gst_hashed_collection_class when creating the classes because it is a
superclass of _gst_dictionary_class, but not when reloading the image.
Same for the primitive numbers; in some way it should be possible to not
look them up.  Actually only ~30 symbols, plus the symbols in sym.c,
should be looked up (right now we are at ~600 overall).

Paolo

2006-12-22  Paolo Bonzini  <[hidden email]>
 
        * libgst/gstpriv.h: Reserve three less bits to runtime flags, add
        F_LOADED.
        * libgst/oop.c: The address of loaded objects never changes, and
        they're always old.  Never compact loaded objects.  Never free them too.
        * libgst/print.c: Print loaded OOPs correctly.
        * libgst/save.c: Define and use buffer_advance.  Try reusing the
        mmap-ed area in load_normal_oops; free buffer in load_normal_oops
        only if copy-on-write is not used.  Mmap all the file in a
        single step, and using MAP_PRIVATE, and only do the "read" method
        in buffer_fill.  Rename use_mmap to buf_used_mmap.  Save objects with
        the old bit set and without the F_RUNTIME flags.


--- orig/libgst/gstpriv.h
+++ mod/libgst/gstpriv.h
@@ -256,7 +256,7 @@ enum {
 
   /* The grouping of all the flags which are not valid across image
      saves and loads.  */
-  F_RUNTIME = 0xFFF000U,
+  F_RUNTIME = 0xFF8000U,
 
   /* Set if the OOP is currently unused.  */
   F_FREE = 0x10U,
@@ -286,6 +286,10 @@ enum {
      and contexts whose receiver is untrusted.  */
   F_UNTRUSTED = 0x800U,
 
+  /* Set for objects that were loaded from the image.  We never
+     garbage collect their contents, only the OOPs.  */
+  F_LOADED = 0x1000U,
+
   /* Set to the number of bytes unused in an object with byte-sized
      instance variables.  Note that this field and the following one
      should be initialized only by INIT_UNALIGNED_OBJECT (not really


--- orig/libgst/oop.c
+++ mod/libgst/oop.c
@@ -747,18 +747,22 @@ _gst_make_oop_fixed (OOP oop)
   if (oop->flags & F_FIXED)
     return;
 
-  size = SIZE_TO_BYTES (TO_INT(oop->object->objSize));
-  newObj = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
-  if (!newObj)
-    abort ();
+  if ((oop->flags & F_LOADED) == 0)
+    {
+      size = SIZE_TO_BYTES (TO_INT(oop->object->objSize));
+      newObj = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
+      if (!newObj)
+        abort ();
 
-  memcpy (newObj, oop->object, size);
-  if (oop->flags & F_OLD)
-    _gst_mem_free (_gst_mem.old, oop->object);
-  else
-    _gst_mem.numOldOOPs++;
+      memcpy (newObj, oop->object, size);
+      if ((oop->flags & F_OLD) == 0)
+ _gst_mem.numOldOOPs++;
+      else
+        _gst_mem_free (_gst_mem.old, oop->object);
+
+      oop->object = newObj;
+    }
 
-  oop->object = newObj;
   oop->flags &= ~(F_SPACES | F_POOLED);
   oop->flags |= F_OLD | F_FIXED;
 }
@@ -778,7 +782,6 @@ _gst_tenure_oop (OOP oop)
         abort ();
 
       memcpy (newObj, oop->object, size);
-
       _gst_mem.numOldOOPs++;
 
       oop->object = newObj;
@@ -1051,7 +1054,7 @@ _gst_compact (size_t new_heap_limit)
        oop < &_gst_mem.ot[_gst_mem.ot_size]; oop++)
     {
       PREFETCH_LOOP (oop, PREF_READ | PREF_NTA);
-      if ((oop->flags & (F_OLD | F_FIXED)) == F_OLD)
+      if ((oop->flags & (F_OLD | F_FIXED | F_LOADED)) == F_OLD)
         {
           gst_object new;
           size_t size = SIZE_TO_BYTES (TO_INT (oop->object->objSize));
@@ -1423,14 +1426,16 @@ _gst_sweep_oop (OOP oop)
       _gst_mem.numOldOOPs--;
       stats.reclaimedOldSpaceBytesSinceLastGlobalGC +=
  SIZE_TO_BYTES (TO_INT (OOP_TO_OBJ (oop)->objSize));
-      _gst_mem_free (_gst_mem.fixed, oop->object);
+      if ((oop->flags & F_LOADED) == 0)
+        _gst_mem_free (_gst_mem.fixed, oop->object);
     }
   else if UNCOMMON (oop->flags & F_OLD)
     {
       _gst_mem.numOldOOPs--;
       stats.reclaimedOldSpaceBytesSinceLastGlobalGC +=
  SIZE_TO_BYTES (TO_INT (OOP_TO_OBJ (oop)->objSize));
-      _gst_mem_free (_gst_mem.old, oop->object);
+      if ((oop->flags & F_LOADED) == 0)
+        _gst_mem_free (_gst_mem.old, oop->object);
     }
 
   oop->flags = F_FREE;


--- orig/libgst/print.c
+++ mod/libgst/print.c
@@ -329,9 +329,10 @@ _gst_display_oop_short (OOP oop)
             oop->flags & F_WEAK ? "Weak" :
             oop->flags & F_EPHEMERON ? "Ephemeron" : "",
 
-            oop->flags & _gst_mem.active_flag ? "To-space" :
             oop->flags & F_FIXED ? "Fixed" :
-            oop->flags & F_OLD ? "Old" : "From-space",
+            oop->flags & F_LOADED ? "Permanent" :
+            oop->flags & F_OLD ? "Old" :
+            oop->flags & _gst_mem.active_flag ? "To-space" : "From-space",
 
     IS_EDEN_ADDR (oop->object) ? "Eden" :
     IS_SURVIVOR_ADDR (oop->object, 0) ? "Surv (Even)" :
@@ -359,9 +360,10 @@ _gst_display_oop (OOP oop)
             oop->flags & F_WEAK ? "Weak" :
             oop->flags & F_EPHEMERON ? "Ephemeron" : "",
 
-            oop->flags & _gst_mem.active_flag ? "To-space" :
             oop->flags & F_FIXED ? "Fixed" :
-            oop->flags & F_OLD ? "Old" : "From-space",
+            oop->flags & F_LOADED ? "Permanent" :
+            oop->flags & F_OLD ? "Old" :
+            oop->flags & _gst_mem.active_flag ? "To-space" : "From-space",
 
     IS_EDEN_ADDR (oop->object) ? "Eden" :
     IS_SURVIVOR_ADDR (oop->object, 0) ? "Surv (Even)" :


--- orig/libgst/save.c
+++ mod/libgst/save.c
@@ -132,7 +132,7 @@ static off_t file_size;
 static off_t file_pos;
 
 /* Whether we are using mmap to read the file.  */
-static mst_Boolean use_mmap;
+static mst_Boolean buf_used_mmap;
 
 
 /* This function establishes a buffer of size NUMBYTES for writes.  */
@@ -155,6 +155,12 @@ static void buffer_read_init (int imageF
 /* This function frees the buffer used for reads.  */
 static void buffer_read_free (int imageFd);
 
+/* This function, which only works if memory-mapped I/O is used, advances
+   the buffer pointer by NUMBYTES and returns the pointer to the previous
+   value of the buffer pointer.  */
+static inline PTR buffer_advance (int imageFd,
+  int numBytes);
+
 /* This function buffers reads from the image file whose descriptor
    is IMAGEFD.  Memory-mapped I/O is used is possible.  */
 static void buffer_read (int imageFd,
@@ -333,9 +339,9 @@ make_oop_table_to_be_saved (struct save_
     {
       if (IS_OOP_VALID_GC (oop))
  {
-  int numPointers;
-          myOOPTable[i].flags = oop->flags;
-  numPointers = NUM_OOPS (oop->object);
+  int numPointers = NUM_OOPS (oop->object);
+
+          myOOPTable[i].flags = (oop->flags & ~F_RUNTIME) | F_OLD;
 
   /* Cache the number of indexed instance variables.  We prefer
      to do more work upon saving (done once) than upon loading
@@ -437,8 +443,6 @@ _gst_load_from_file (const char *fileNam
   imageFd = _gst_open_file (fileName, "r");
   loaded = (imageFd >= 0) && load_snapshot (imageFd);
 
-  buffer_read_free (imageFd);
-
   close (imageFd);
   return (loaded);
 }
@@ -545,8 +549,11 @@ load_normal_oops (int imageFd)
   OOP oop;
   gst_object object;
   int i;
+  mst_Boolean use_copy_on_write
+    = buf_used_mmap && ~wrong_endianness && ot_delta == 0;
 
-  /* Now walk the oop table.  Start fixing the byte order.  */
+  /* Now walk the oop table.  Load the data (or get the addresses from the
+     mmap-ed area) and fix the byte order.  */
 
   _gst_mem.last_allocated_oop = &_gst_mem.ot[num_used_oops - 1];
   PREFETCH_START (_gst_mem.ot, PREF_WRITE | PREF_NTA);
@@ -570,32 +577,40 @@ load_normal_oops (int imageFd)
  to create new-space objects.  The solution is not however as neat
  as possible.  */
 
-      flags &= ~(F_SPACES | F_POOLED | F_RUNTIME);
-      flags |= F_OLD;
-
       _gst_mem.numOldOOPs++;
       size = sizeof (PTR) * (size_t) oop->object;
-      if (flags & F_FIXED)
+      if (use_copy_on_write)
  {
-  _gst_mem.numFixedOOPs++;
-          object = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
+  oop->flags |= F_LOADED;
+  object = buffer_advance (imageFd, size);
  }
-      else
-        object = (gst_object) _gst_mem_alloc (_gst_mem.old, size);
 
-      buffer_read (imageFd, object, size);
-      if UNCOMMON (wrong_endianness)
- fixup_byte_order (object,
-  (oop->flags & F_BYTE)
-  ? OBJ_HEADER_SIZE_WORDS
-  : size / sizeof (PTR));
+      else
+ {
+  if (flags & F_FIXED)
+    {
+      _gst_mem.numFixedOOPs++;
+              object = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
+    }
+          else
+            object = (gst_object) _gst_mem_alloc (_gst_mem.old, size);
 
-      if (object->objSize != FROM_INT ((size_t) oop->object))
- abort ();
+          buffer_read (imageFd, object, size);
+          if UNCOMMON (wrong_endianness)
+    fixup_byte_order (object,
+      (oop->flags & F_BYTE)
+      ? OBJ_HEADER_SIZE_WORDS
+      : size / sizeof (PTR));
+
+  /* Would be nice, but causes us to touch every page and lose most
+     of the startup-time benefits of copy-on-write.  So we only
+     do it in the slow case, anyway.  */
+  if (object->objSize != FROM_INT ((size_t) oop->object))
+    abort ();
+        }
 
       /* Remove flags that are invalid after an image has been loaded.  */
       oop->object = object;
-      oop->flags = flags;
 
       if (flags & F_WEAK)
  _gst_make_oop_weak (oop);
@@ -615,6 +630,9 @@ load_normal_oops (int imageFd)
           classOOP = OOP_ABSOLUTE (object->objClass);
   fixup_byte_order (object->data, CLASS_FIXED_FIELDS (classOOP));
  }
+
+  if (!use_copy_on_write)
+    buffer_read_free (imageFd);
 }
 
 
@@ -775,67 +793,53 @@ void
 buffer_fill (int imageFd)
 {
   buf_pos = 0;
-  if (use_mmap)
-    {
-#ifndef WIN32
-      if (buf)
- _gst_osmem_free (buf, buf_size);
-
-      buf = mmap (NULL, buf_size, PROT_READ, MAP_SHARED, imageFd, file_pos);
-
-      if (buf != (PTR) -1)
- {
-#ifdef HAVE_MADVISE
-#ifdef MADV_WILLNEED
-  madvise (buf, buf_size, MADV_WILLNEED);
-#endif
-
-  /* Ahem... this madvise causes a kernel OOPS on my machine!  */
-#if 0
-#ifdef MADV_SEQUENTIAL
-  madvise (buf, buf_size, MADV_SEQUENTIAL);
-#endif
-#endif
-
-#endif /* HAVE_MADVISE */
-  return;
- }
-#endif /* !WIN32 */
-
-      /* First non-mmaped input operation.  Allocate the buffer.  */
-      buf = xmalloc (buf_size);
-      use_mmap = false;
-    }
-
-  /* Cannot mmap the file, use read(2).  */
   read (imageFd, buf, buf_size);
 }
 
 void
 buffer_read_init (int imageFd, int numBytes)
 {
-  if (numBytes)
+  struct stat st;
+  fstat (imageFd, &st);
+  file_size = st.st_size;
+  file_pos = 0;
+
+#ifndef WIN32
+  buf = mmap (NULL, file_size, PROT_READ, MAP_PRIVATE, imageFd, 0);
+
+  if (buf != (PTR) -1)
     {
-      struct stat st;
-      fstat (imageFd, &st);
-      file_size = st.st_size;
-      file_pos = 0;
-      buf_size = numBytes;
-      use_mmap = true;
-      buf = NULL;
-      buffer_fill (imageFd);
+      buf_size = file_size;
+      buf_used_mmap = true;
+      return;
     }
+#endif /* !WIN32 */
+
+  /* Non-mmaped input.  */
+  buf_used_mmap = false;
+  buf_size = numBytes;
+  buf = xmalloc (buf_size);
+  buffer_fill (imageFd);
 }
 
 void
 buffer_read_free (int imageFd)
 {
-  if (use_mmap)
+  if (buf_used_mmap)
     _gst_osmem_free (buf, buf_size);
   else
     xfree (buf);
 }
 
+PTR
+buffer_advance (int imageFd,
+        int numBytes)
+{
+  PTR current_pos = buf + buf_pos;
+  buf_pos += numBytes;
+  return current_pos;
+}
+
 void
 buffer_read (int imageFd,
      PTR pdata,
@@ -843,15 +847,6 @@ buffer_read (int imageFd,
 {
   char *data = (char *) pdata;
 
-#if 0
-  /* Avoid triggering a SIGBUS.  Unnecessary, and wastes time.  */
-  if UNCOMMON (numBytes > file_size - file_pos)
-    {
-      memzero (data + file_size - file_pos, numBytes - (file_size - file_pos));
-      numBytes = file_size - file_pos;
-    }
-#endif
-
   if UNCOMMON (numBytes > buf_size - buf_pos)
     {
       memcpy (data, buf + buf_pos, buf_size - buf_pos);




_______________________________________________
help-smalltalk mailing list
[hidden email]
http://lists.gnu.org/mailman/listinfo/help-smalltalk
Reply | Threaded
Open this post in threaded view
|

Re: faster startup 8/n, copy-on-write

Jim White-8
_______________________________________________
help-smalltalk mailing list
[hidden email]
http://lists.gnu.org/mailman/listinfo/help-smalltalk

sigsegv.sparc64.diff (352 bytes) Download Attachment
lex.sparc64.diff (454 bytes) Download Attachment