[Monetdb-developers] shredding loop-lifted fn:doc() into one collection

Stefan Manegold Stefan.Manegold at cwi.nl
Sun Oct 15 13:52:39 CEST 2006


Peter,

our fix of Friday night to shred all documents of a loop-lifted fn:doc()
call into one single collection (as opposed to creating one collection per
document) --- not checked in, yet; see attached "pathfinder.mx.LLdoc.PATCH."
for details --- seems to work fine (at least correct; speed/performance
needs to be analyzed in detail) for file-URLs, i.e., documents that get
indeed cached persistently).

However, for non-file URLs (i.e., document that do get cached and hence stay
trnasient), it did/does not work, basically as the docuemnts are shredded
into the "transient" collection, and are (hence?) not assigned unique
document IDs --- the document do not end up in the doc_* BATs.

A HACK(?) to use the (unified) temporary document-/collection-ID for all
documents (see attached "pathfinder.mx.LLdoc.FIX.") seems to work ---
however, I have no idea, whether this is what was indended...

More "live" on Monday.

Stefan

-- 
| Dr. Stefan Manegold | mailto:Stefan.Manegold at cwi.nl |
| CWI,  P.O.Box 94079 | http://www.cwi.nl/~manegold/  |
| 1090 GB Amsterdam   | Tel.: +31 (20) 592-4212       |
| The Netherlands     | Fax : +31 (20) 592-4312       |
-------------- next part --------------
--- runtime/pathfinder.mx	7 Oct 2006 01:21:52 -0000	1.248
+++ runtime/pathfinder.mx	13 Oct 2006 18:45:15 -0000
@@ -1622,28 +1622,27 @@
         selidx_names := selidx_names.mirror().leftfetchjoin(idx_names);
         var selidx_unames := selidx_names.reverse().kunique().reverse();
         var commitBAT := bat(void,str);
-        selidx_unames at batloop() {
-            # note that due to caching rules, some temporary documents (e.g. file URLs) become persistent
-            var ts       := idx_timestamps.find($h);
-            var doCommit := not(isnil(ts));
-            var colname  := "::" + str(wsid) + "::" + $t; # unique name contains ws identfier (MT unique)
-            var name     := bat(void,str).append(colname).seqbase(0 at 0).access(BAT_READ);
-            var location := bat(void,str).append($t).seqbase(0 at 0).access(BAT_READ);
-            var docBAT   := bat(str,bat); 
-            var coll_oid := shred_into_docBAT(docBAT, location, name, colname, oid_nil, empty_runtime, doCommit, lng(0), ts, stream_nil, wsid);
-            var cont     := ws_opencoll(ws, docBAT, colname, coll_oid);
-            if (doCommit) commitBAT.append([bbpname](docBAT).tmark(0 at 0));
-            var idx_repl := selidx_names.uselect($t);
-            idx_colname.replace(idx_repl.project(colname));
-            idx_repl     := idx_repl.project(coll_oid);
-            idx_coll.replace(idx_repl);
-            idx_doc.replace(idx_repl);
-        }
+        var docBAT    := bat(str,bat);
+        var locations := selidx_unames.tmark(0 at 0);
+        var names     := [+]("::" + str(wsid) + "::",  locations);
+        var colname   := names.fetch(0);
+        var ts        := selidx_unames.mirror().leftfetchjoin(idx_timestamps).min();
+        var doCommit  := not(isnil(ts));
+        var coll_oid  := shred_into_docBAT(docBAT, locations, names, colname, oid_nil, empty_runtime, doCommit, lng(0), ts, stream_nil, wsid); 
+        var cont      := ws_opencoll(ws, docBAT, colname, coll_oid);
+        if (doCommit) commitBAT.append([bbpname](docBAT).tmark(0 at 0));
+        
         # commit the new collections 
         pf_checkpoint(commitBAT);
         lock_set(pf_short);
         doc_undo.delete(doc_undo.select(wsid)); # if these remain, ws_destroy() would remove the new documents
+        var doc_oids := names.leftjoin(doc_name.reverse());
         lock_unset(pf_short);
+
+        # set new colname, coll_oid, and doc_oid
+        idx_colname.replace(selidx_names.project(colname));
+        idx_coll.replace(selidx_names.project(coll_oid));
+        idx_doc.replace(selidx_names.leftjoin(locations.reverse()).leftjoin(doc_oids));
     }
 
     # add in bulk all documents to the ws (could be thousands of them!!)
-------------- next part --------------
--- runtime/pathfinder.mx.LLdoc.	2006-10-13 20:44:53.000000000 +0200
+++ runtime/pathfinder.mx.LLdoc2.	2006-10-15 13:37:31.000000000 +0200
@@ -1634,9 +1634,14 @@
         
         # commit the new collections 
         pf_checkpoint(commitBAT);
+        var doc_oids;
         lock_set(pf_short);
         doc_undo.delete(doc_undo.select(wsid)); # if these remain, ws_destroy() would remove the new documents
-        var doc_oids := names.leftjoin(doc_name.reverse());
+        if (doCommit) {
+        	doc_oids := names.leftjoin(doc_name.reverse());
+        } else {
+        	doc_oids := names.project(coll_oid);
+        }
         lock_unset(pf_short);
 
         # set new colname, coll_oid, and doc_oid


More information about the developers-list mailing list