Re: [Monetdb-developers] [Monetdb-pf-checkins] pathfinder/modules/pftijah nexi.c, 1.49, 1.50 pftijah_tokenize.l, 1.12, 1.13 pftijah_util.mx, 1.2, 1.3 serialize_pftijah.mx, 1.41, 1.42
On 2007-02-27 16:43, Jan Flokstra wrote:
Update of /cvsroot/monetdb/pathfinder/modules/pftijah In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv27686
Modified Files: nexi.c pftijah_tokenize.l pftijah_util.mx serialize_pftijah.mx Log Message:
- repair BBP refcount bug for BAT
Is this a fix which also applies to the stable branch?
reimplement the direct bat acces methods in pftijah serialization for more speed (and clarity).
Start optimizing the the pftijah tokenizer. The flex functions are called once per handle_character() call. This leads to 2 malloc's per call. I tried to do without the malloc's but this caused to a lot of strange results:-) I am now planning to craft the flexer by hand. The first small experiment shows there is a lot to gain there. (25% speedup in indexing time).
Index: serialize_pftijah.mx
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v retrieving revision 1.41 retrieving revision 1.42 diff -u -d -r1.41 -r1.42 --- serialize_pftijah.mx 23 Feb 2007 15:11:07 -0000 1.41 +++ serialize_pftijah.mx 27 Feb 2007 15:43:37 -0000 1.42 @@ -31,8 +31,8 @@
extern int handleTijahTerm(struct tijahContextStruct *ctx, char* term);
-extern int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx); /* FLEX */ -extern char* flexScanOneTerm(char* buf, int len); +extern int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx); /* FLEX */ +extern char* flexScanOneTerm(char* buf);
extern char* normalizeTerm(struct tijahContextStruct *ctx, char* term );
@@ -70,15 +70,10 @@ typedef struct dbat_struct { const char* name; BAT* bat;
- int oid_mark;
- int max_i;
- int max_sz;
- bit dflt; /* fill with default value during extend */
- int dflt_int; /* the default int value */
- chr dflt_chr; /* the default chr value */
- oid dflt_oid; /* the default oid value */
- /* */
union { /* cast to perform direct indexex insert in [void,any] BATs */
- oid raw_max;
- oid seqbase;
- oid seq_max;
union { /* cast to perform direct indexe insert in [void,any] BATs */ void* voidCAST; /* the basecast */ chr* chrCAST; /* cast for [void,chr] BAT */ int* intCAST; /* cast for [void,int] BAT */
@@ -89,7 +84,6 @@ int dbat_init(const char* name, dbat* dbat, BAT* b) { dbat->name = name; dbat->bat = b;
- dbat->dflt = FALSE; if ( dbat->bat->htype != TYPE_void ) { stream_printf(GDKerr,"ERROR: dbat_init(%s) non void BAT\n",dbat->name); return 0;
@@ -98,31 +92,25 @@ stream_printf(GDKerr,"ERROR: dbat_init(%s) unknown ttype(%d)\n",dbat->name,dbat->bat->ttype); return 0; }
dbat->oid_mark = b->hseqbase;
- dbat->max_i = dbat->max_sz = BATcount(dbat->bat);
dbat->seqbase = (oid)b->hseqbase;
- dbat->raw_max = (oid)BATcount(dbat->bat);
- dbat->seq_max = dbat->raw_max + dbat->seqbase; dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat); /* */ return 1;
}
-int dbat_finalize(dbat* dbat) {
BAT* b = dbat->bat;
+int dbat_finalize(dbat* dbat, int topidx) { void* top;
BAT* b = dbat->bat;
int bottomTop = dbat->max_i;
- if ( 0 ) stream_printf(GDKout,"dbat_finalize(size=%d)\n",dbat->max_i);
- topidx -= (int)dbat->seqbase;
int bottomTop = topidx; switch( b->ttype ) { case TYPE_int : top = &dbat->cast.intCAST[bottomTop]; break;
case TYPE_chr: {
b->batBuns->free = dbat->max_i;
BATsetcount(b, dbat->max_i);
b->tsorted = 0;
b->batDirty = TRUE; /* VERY important this one */
return 1;
} case TYPE_oid: top = &dbat->cast.oidCAST[bottomTop]; break;
@@ -137,7 +125,7 @@ /* */ dbat->name = NULL; dbat->bat = NULL;
- dbat->max_i = dbat->max_sz = 0;
- dbat->raw_max = dbat->seqbase = 0; /* */ return 1;
} @@ -145,9 +133,14 @@ #define MINCHUNK 8192 #define MAXCHUNK 67108864
-int dbat_extend(dbat* dbat, int i_mark) {
- /* CHECK THIS if ( i_mark ) i = i_mark - dbat->oid_mark; */
- size_t newsize = MAX(MIN(MAX(MINCHUNK,dbat->max_sz*2),(size_t)(dbat->max_sz+MAXCHUNK)),(size_t)i_mark);
+int dbat_extend(dbat* dbat, oid min_i /*raw-index*/, size_t forced_size) {
size_t newsize;
if ( forced_size ) {
newsize = forced_size;
} else {
newsize = MAX(MIN(MAX(MINCHUNK,dbat->raw_max*2),(size_t)(dbat->raw_max+MAXCHUNK)),(size_t)min_i);
}
/* first check if the number of BUN's < INT_MAX. If this was the case
- and the previous time INT_MAX was returned this means the BAT cannot
@@ -156,94 +149,34 @@ if ( newsize > INT_MAX ) { newsize = INT_MAX;
- if ( dbat->max_sz == INT_MAX ) {
- if ( dbat->raw_max == INT_MAX ) { GDKerror("dbat_extend: BATextend["%s"](size>INT_MAX) fails\n","incomplete"); return -1; } }
- if ( 0 ) { stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->max_sz,newsize); }
- dbat->max_sz= newsize;
+#if 0
- stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->raw_max,newsize);
+#endif
- dbat->raw_max= newsize;
- dbat->seq_max = dbat->raw_max + dbat->seqbase; if ( !(dbat->bat = BATextend(dbat->bat,newsize)) ) { GDKerror("dbat_extend: BATextend["%s"](to %d) fails\n","incomplete",newsize); return -1; } dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat);
- /*
* now check if there's a default value handler used
*
*/
- if ( dbat->dflt ) {
switch( dbat->bat->ttype ) {
case TYPE_int : {
int v = dbat->dflt_int;
int *to = &dbat->cast.intCAST[dbat->max_sz];
for(register int *p = &dbat->cast.intCAST[dbat->max_i]; p<to;)
*p++ = v;
break;
}
case TYPE_chr: {
chr v = dbat->dflt_chr;
chr *to = &dbat->cast.chrCAST[dbat->max_sz];
for(register chr *p = &dbat->cast.chrCAST[dbat->max_i]; p<to;)
*p++ = v;
break;
}
case TYPE_oid: {
oid v = dbat->dflt_oid;
oid *to = &dbat->cast.oidCAST[dbat->max_sz];
for(register oid *p = &dbat->cast.oidCAST[dbat->max_i]; p<to;)
*p++ = v;
break;
}
default:
GDKerror("dbat_extend: bad ttype\n");
return -1;
}
- }
- /* */ return 1;
}
int dbat_sizeHint(dbat* dbat, int sizeHint_mark) {
int sizeHint = sizeHint_mark - dbat->oid_mark;
- int estimate = dbat->max_i + sizeHint;
- return dbat_extend(dbat, estimate);
-}
-INLINE static int dbat_set_oid(dbat* dbat, int pos_mark, oid v) {
- register int pos;
int sizeHint = sizeHint_mark - dbat->seqbase;
- int estimate = dbat->raw_max + sizeHint;
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) {
dbat->cast.oidCAST[pos] = v;
return 1;
- } else {
if ( pos >= dbat->max_sz ) {
if ( dbat_extend(dbat,pos) < 0 )
return -1;
}
dbat->max_i = pos + 1;
dbat->cast.oidCAST[pos] = v;
return 1;
- }
- return dbat_extend(dbat, estimate, 0);
}
-INLINE static int dbat_set_int(dbat* dbat, int pos_mark, int v) {
- register int pos;
+#define dbat_set_oid(DBAT,I,V) (DBAT)->cast.oidCAST[I-(DBAT)->seqbase] = V
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) {
dbat->cast.intCAST[pos] = v;
return 1;
- } else {
if ( pos >= dbat->max_sz ) {
if ( dbat_extend(dbat,pos) < 0 )
return -1;
}
dbat->max_i = pos + 1;
dbat->cast.intCAST[pos] = v;
return 1;
- }
-} +#define dbat_set_int(DBAT,I,V) (DBAT)->cast.intCAST[I-(DBAT)->seqbase] = V
/************************************************
@@ -315,9 +248,6 @@
/************************************************
- First the temporary shredder for Tijah by JF
*/
INLINE static oid @@ -328,15 +258,15 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTag, (str)t);
- if ( bun )
- /* if ( (bun = BUNfnd(tjctx->hm_globalTag,t)) ) OLD */
- if ( bun ) { return *(oid*)BUNtail(tjctx->hm_globalTag,bun);
- else {
if ( !BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) {
- } else {
if ( BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) {
return tjctx->n_globalTag++;
} else { GDKerror("INSERT OF \"%s\" in globalTag fails.\n"); return oid_nil;
} else
return tjctx->n_globalTag++;
}}
#endif } @@ -349,10 +279,7 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTerm, (str)t);
- if ( bun ) {
return *(oid*)BUNtail(tjctx->hm_globalTerm,bun);
- } else
return oid_nil;
- return ( bun ? *(oid*)BUNtail(tjctx->hm_globalTerm,bun) : oid_nil );
}
INLINE static oid @@ -366,22 +293,35 @@ if ( bun ) return *(oid*)BUNtail(tjctx->hm_globalTerm,bun); else {
if ( !BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){
if ( BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){
return tjctx->n_globalTerm++;
} else { GDKerror("INSERT OF \"%s\" in globalTerm fails.\n"); return oid_nil;
} else
return tjctx->n_globalTerm++;
}}
#endif }
-#define tj_add2plane(TJCTX,O) \
- ((dbat_set_oid(&(TJCTX)->dbat_collPre, (TJCTX)->tijahPre, O) < 0) \
? oid_nil : ((oid)(TJCTX)->tijahPre++))
+INLINE oid tj_extend_plane(struct tijahContextStruct *tjctx) {
- oid base = tjctx->tijahPre - tjctx->dbat_collPre.seqbase;
-#define insertPreSize(TJCTX,POS,SIZE) \
- dbat_set_int(&TJCTX->dbat_collSize,(int)POS,SIZE)
- if ( base >= tjctx->dbat_collPre.raw_max ) {
- if ( dbat_extend(&tjctx->dbat_collPre,base, 0) < 0 )
return oid_nil;
- /* IMPORTANT: the size of the two bats is synchronized by the use
* of the forced size (last) parameter of dbat_extend
*/
- if ( dbat_extend(&tjctx->dbat_collSize,base,tjctx->dbat_collPre.raw_max) < 0 )
return oid_nil;
- }
- return tjctx->tijahPre++;
+}
+#define tj_newPre(TJCTX) \
- (((TJCTX)->tijahPre < (TJCTX)->dbat_collPre.seq_max) \
- ? \
- ((oid)(TJCTX)->tijahPre++) : tj_extend_plane(TJCTX))
int handleTijahTerm(struct tijahContextStruct *tjctx, char* term) { @@ -397,13 +337,13 @@ } } if ( (termOid = tj_termOid(tjctx, term)) == oid_nil )
return 0;
} if ( termOid ) { /* term is not a stopword */return -1;
if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil )
if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0;
if ( insertPreSize(tjctx,tjPre,0) < 0 )
return -1;
dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid);
dbat_set_int(&tjctx->dbat_collSize,(int)tjPre,0);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:TERM: "%s", termoid=%d, tjPre=%d\n",tjctx->name,term,termOid,tjPre); #endif @@ -416,40 +356,13 @@ return 1; }
-/************
- The part where the Strings from Pathfinder are shredded into words
- by Tijah. The USE_FLEX macro determines if the strings is shredded
- by Hennings fancy flex scanner or Jan's simple strtok() scanner.
- */
-const char* obsoleteNexiChars = " \t\n\r,:;&*%$#!@=";
-int -useStrtokScanner(tjCtx* tjctx, char* s) -{
- char *t;
- int sz = 0;
-#ifdef TJ_TRACE
- if (TJ_TRACE) stream_printf(GDKout,"C[%s]:CHARACTERS:\n",tjctx->name);
-#endif
- if ( (t = strtok(s,obsoleteNexiChars)) ) do {
- /* not the empty string here */
if ( handleTijahTerm(tjctx,t) < 0 )
return -1;
sz++;
- } while ( (t=strtok(NULL,obsoleteNexiChars)) );
- return 1;
-}
/************************************************
- Now the real output handlers
*/
-#ifdef notused +#if 0 static int handle_sizeHint(XqueryCtx* ctx, int hinted_size) { tjCtx* tjctx = (tjCtx*)ctx->driverWs; @@ -502,14 +415,12 @@ return (str)str_nil; }
-#define GUESSFORCE FALSE
/*
- Replace the value of a collection parameter int the collection parameter
- bat
*/ static int replaceCollParam(tjCtx* tjctx, str param, str val) {
- return ( BUNreplace(tjctx->b_collParam,param,val,GUESSFORCE) != NULL );
- return ( BUNreplace(tjctx->b_collParam,param,val,FALSE) != NULL );
}
static BAT* @@ -894,10 +805,10 @@ /* if ( DOEMIT(tjctx) ) { */ if ( (termOid = tj_tagOid(tjctx, name)) == oid_nil ) return 0;
if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil )
if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0;
dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid); if ( tj_pushTag(tjctx,tjPre) < 0 ) return 0;
if ( 0 ) stream_printf(GDKout,"C[%s]:startElement: \"%s\", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:startElement: "%s", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre); #endif @@ -913,8 +824,7 @@ --tjctx->doc_height; oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */
- if ( insertPreSize(tjctx,start,size) < 0 )
return 0;
- dbat_set_int(&tjctx->dbat_collSize,(int)start,size);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:empty_endElement: "%s"\n", tjctx->name,""); #endif @@ -934,8 +844,7 @@ /* if ( DOEMIT(tjctx) ) { */ oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */
- if ( insertPreSize(tjctx,start,size) < 0 )
return 0;
- dbat_set_int(&tjctx->dbat_collSize,(int)start,size);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:endElement: "%s"\n", tjctx->name,name); #endif @@ -944,8 +853,6 @@ return 1; }
-#define USE_FLEX 1
/**
- Output generation handler. Handles equivalent of * SAX characters() event.
*/ @@ -954,28 +861,23 @@ EMPTY_CHECK; tjCtx* tjctx = (tjCtx*)ctx->driverWs;
- register char* p = (char*)ch;
- while( *p && isspace(*p) ) p++;
- if ( !*p )
return 1;
#ifdef TJ_TRACE
- if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, (char*)ch);
- if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, p);
#endif
if ( DOEMIT(tjctx) ) {
-#ifdef USE_FLEX
return useFlexScanner((char*)ch,strlen((char*)ch),tjctx);
-#else
return useStrtokScanner(tjctx,(char*)ch);
-#endif
} return 1;return useFlexScanner(p,tjctx);
}
char* normalizeTerm(struct tijahContextStruct *tjctx, char* term ) { char *res; -#ifdef USE_FLEX
res = flexScanOneTerm((char*)term,strlen((char*)term));
-#else
- res = strtok(term,obsoleteNexiChars);
-#endif
- /* INCOMPLETE, should make shure tijahContext is always avail. here */
res = flexScanOneTerm((char*)term); if ( res && tjctx && tjctx->stemCtx->stem) { if ( !(res = (char*)tjctx->stemCtx->stem(tjctx->stemCtx,(char*)res)) ) { /* must be a stopword */
@@ -986,14 +888,6 @@ }
int CMDtj_normalizeTerm(char** res, str term, str stemmer) { -//Leave tokenization disabled for now -// char* tokenized; -//#ifdef USE_FLEX -// tokenized = flexScanOneTerm(term,strlen(term)); -//#else -// tokenized = strtok(term,obsoleteNexiChars); -//#endif
tjStemCtx* stemCtx = getStemmingContext( stemmer );
if ( stemCtx->stem ) {
@@ -1123,13 +1017,9 @@ #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINISH INDEXING\n",tjctx->name); #endif
/* feature not used anymore ????? */
- if ( 0 /* ROEL CHANGE VIRTUAL ROOT SIZE HERE */ )
insertPreSize(tjctx,0,tjctx->tijahPre - 1);
- if ( dbat_finalize(&tjctx->dbat_collPre) < 0 )
- if ( dbat_finalize(&tjctx->dbat_collPre, tjctx->tijahPre) < 0 ) return GDK_FAIL;
- if ( dbat_finalize(&tjctx->dbat_collSize) < 0 )
- if ( dbat_finalize(&tjctx->dbat_collSize, tjctx->tijahPre) < 0 ) return GDK_FAIL;
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINALIZED DIRECT BATS\n",tjctx->name);
Index: pftijah_tokenize.l
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_tokenize.l,v retrieving revision 1.12 retrieving revision 1.13 diff -u -d -r1.12 -r1.13 --- pftijah_tokenize.l 9 Jan 2007 15:44:39 -0000 1.12 +++ pftijah_tokenize.l 27 Feb 2007 15:43:37 -0000 1.13 @@ -115,7 +115,40 @@
%%
-int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx) { +int OPT0useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) {
- /* UPDATE: this delivers very strange testset results and should not be
- used I think.
- */
- /* This is an optimized version of the flex scanner which does not copy the
- input buffer. The only strange thing about this interface is that it
- requires 2 YY_END_OF_BUFFER_CHAR (eg. 0) at the end of the buffer. The
- size of the buffer is inclusive the 2 0's.
- The last zero is toggled with its original value to prevent corruption
- of memory management tables. This was for me the only way to prevent
- copying here.
- */
- int len = strlen(buf);
- char remember = buf[len+1];
- buf[len+1] = YY_END_OF_BUFFER_CHAR;
- YY_BUFFER_STATE myBuf = yy_scan_buffer(buf, len+2);
- if ( !myBuf ) {
stream_printf(GDKout,"# useFlexScanner: unable to get setup non-copy buffer.");
return 0;
- }
- while ( pftijah_tokenizelex() ) {
/* stream_printf(GDKout,"# scan(%s).\n",pftijah_tokenizetext); */
if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) )
return 0;
- }
- yy_delete_buffer(myBuf);
- buf[len+1] = remember;
- return 1;
+}
+int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) {
- // the original
- int len = strlen(buf); YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); while (pftijah_tokenizelex()) { if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) )
@@ -125,6 +158,40 @@ return 1; }
+int OPT2useFlexScanner(char* input, struct tijahContextStruct* tjctx) +{
- /* the fast function. This function is in the pftijah context with lots
* of small strings to tokenize many times faster as the flex and the
* strtok() methods which seem to have a rather larger overhead
*/
- register char* s = input;
- register char x;
+// #define EMIT x=*s; *s=0; stream_printf(GDKout,"#[%s]\n",base);if (!handleTijahTerm(tjctx,base)) return 0; *s=x +#define EMIT x=*s; *s=0; if (!handleTijahTerm(tjctx,base)) return 0; *s=x
- while ( 1 ) {
while ( isspace( *s ) ) s++;
if ( *s ) {
char* base = s;
if ( isalnum(*s) ) {
if ( isdigit(*s) ) {
while ( isdigit(*++s) ) ;
EMIT;
} else {
if (isupper(*s)) *s=tolower(*s);
while ( isalnum(*++s) ) if (isupper(*s)) *s=tolower(*s);
EMIT;
}
} else {
// INCOMPLETE, ENTITIES HERE
// stream_printf(GDKout,"#[SKIPPING:%c]\n",*s);
s++;
}
} else
return 1;
- }
+}
char* tijah_tokenize_string(char* buf, int len, char* outbuf) { int cnt = 0; YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); @@ -137,9 +204,10 @@ return outbuf; }
-char* flexScanOneTerm(char* buf, int len) { +char* flexScanOneTerm(char* buf) { char *res; char resBUFF[256];
int len = strlen(buf);
YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); if ( pftijah_tokenizelex() ) {
Index: nexi.c
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v retrieving revision 1.49 retrieving revision 1.50 diff -u -d -r1.49 -r1.50 --- nexi.c 23 Feb 2007 15:11:05 -0000 1.49 +++ nexi.c 27 Feb 2007 15:43:37 -0000 1.50 @@ -455,6 +455,7 @@ /* * Now find out if the collection is fragmented or not. */
- /* INCOMPLETE, ERROR HERE WITH REFCOUNTS IN HEAD */ BAT* fb = pftu_lookup_bat(pftu_batname1("tj_%s_fragments",(char*)parserCtx->collection,0)); if ( ! fb ) { stream_printf(GDKerr,"Error: cannot find fragments bat for collection "%s".\n",parserCtx->collection);
@@ -471,6 +472,8 @@ parserCtx->ffPfx = ""; parserCtx->flastPfx = ", str(1)"; }
- BBPunfix(BBPcacheid(fb));
- fb = NULL; // Some special cases for NLLR, since NLLR only works with COARSE2 at the moment if ( txt_retr_model->model == MODEL_NLLR ) { // Switch to COARSE2 algebra for NLLR
Index: pftijah_util.mx
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_util.mx,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- pftijah_util.mx 9 Jan 2007 17:15:23 -0000 1.2 +++ pftijah_util.mx 27 Feb 2007 15:43:37 -0000 1.3 @@ -73,6 +73,7 @@ if ( b == bat_nil ) { return NULL; } else {
}BBPfix(b); return BBPdescriptor(b);
}
Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=D... _______________________________________________ Monetdb-pf-checkins mailing list Monetdb-pf-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins
On 2/27/2007, "Sjoerd Mullender" sjoerd@acm.org wrote:
On 2007-02-27 16:43, Jan Flokstra wrote:
Update of /cvsroot/monetdb/pathfinder/modules/pftijah In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv27686
Modified Files: nexi.c pftijah_tokenize.l pftijah_util.mx serialize_pftijah.mx Log Message:
- repair BBP refcount bug for BAT
Is this a fix which also applies to the stable branch?
I'm not shure yet. The bug only shows in the HEAD branch and does not occur in the release branch. Problem was I did:
bat b = BBPindex(......); if ( b != bat_nil ) return BBPdescriptor(b)
The refcount assert crash occurs in the BBPdescriptor(). I used this construction before and never had any problem. The bug made the "Current" branch useless so I decided to (un)fix(:) it quickly with a BBPfix() / BBPunfix(). I will try to figure out what to do next in the near future. Maybe I even try to consult the CWI people :-)
reimplement the direct bat acces methods in pftijah serialization for more speed (and clarity).
Start optimizing the the pftijah tokenizer. The flex functions are called once per handle_character() call. This leads to 2 malloc's per call. I tried to do without the malloc's but this caused to a lot of strange results:-) I am now planning to craft the flexer by hand. The first small experiment shows there is a lot to gain there. (25% speedup in indexing time).
Index: serialize_pftijah.mx
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v retrieving revision 1.41 retrieving revision 1.42 diff -u -d -r1.41 -r1.42 --- serialize_pftijah.mx 23 Feb 2007 15:11:07 -0000 1.41 +++ serialize_pftijah.mx 27 Feb 2007 15:43:37 -0000 1.42 @@ -31,8 +31,8 @@
extern int handleTijahTerm(struct tijahContextStruct *ctx, char* term);
-extern int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx); /* FLEX */ -extern char* flexScanOneTerm(char* buf, int len); +extern int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx); /* FLEX */ +extern char* flexScanOneTerm(char* buf);
extern char* normalizeTerm(struct tijahContextStruct *ctx, char* term );
@@ -70,15 +70,10 @@ typedef struct dbat_struct { const char* name; BAT* bat;
- int oid_mark;
- int max_i;
- int max_sz;
- bit dflt; /* fill with default value during extend */
- int dflt_int; /* the default int value */
- chr dflt_chr; /* the default chr value */
- oid dflt_oid; /* the default oid value */
- /* */
union { /* cast to perform direct indexex insert in [void,any] BATs */
- oid raw_max;
- oid seqbase;
- oid seq_max;
union { /* cast to perform direct indexe insert in [void,any] BATs */ void* voidCAST; /* the basecast */ chr* chrCAST; /* cast for [void,chr] BAT */ int* intCAST; /* cast for [void,int] BAT */
@@ -89,7 +84,6 @@ int dbat_init(const char* name, dbat* dbat, BAT* b) { dbat->name = name; dbat->bat = b;
- dbat->dflt = FALSE; if ( dbat->bat->htype != TYPE_void ) { stream_printf(GDKerr,"ERROR: dbat_init(%s) non void BAT\n",dbat->name); return 0;
@@ -98,31 +92,25 @@ stream_printf(GDKerr,"ERROR: dbat_init(%s) unknown ttype(%d)\n",dbat->name,dbat->bat->ttype); return 0; }
dbat->oid_mark = b->hseqbase;
- dbat->max_i = dbat->max_sz = BATcount(dbat->bat);
dbat->seqbase = (oid)b->hseqbase;
- dbat->raw_max = (oid)BATcount(dbat->bat);
- dbat->seq_max = dbat->raw_max + dbat->seqbase; dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat); /* */ return 1;
}
-int dbat_finalize(dbat* dbat) {
BAT* b = dbat->bat;
+int dbat_finalize(dbat* dbat, int topidx) { void* top;
BAT* b = dbat->bat;
int bottomTop = dbat->max_i;
- if ( 0 ) stream_printf(GDKout,"dbat_finalize(size=%d)\n",dbat->max_i);
- topidx -= (int)dbat->seqbase;
int bottomTop = topidx; switch( b->ttype ) { case TYPE_int : top = &dbat->cast.intCAST[bottomTop]; break;
case TYPE_chr: {
b->batBuns->free = dbat->max_i;
BATsetcount(b, dbat->max_i);
b->tsorted = 0;
b->batDirty = TRUE; /* VERY important this one */
return 1;
} case TYPE_oid: top = &dbat->cast.oidCAST[bottomTop]; break;
@@ -137,7 +125,7 @@ /* */ dbat->name = NULL; dbat->bat = NULL;
- dbat->max_i = dbat->max_sz = 0;
- dbat->raw_max = dbat->seqbase = 0; /* */ return 1;
} @@ -145,9 +133,14 @@ #define MINCHUNK 8192 #define MAXCHUNK 67108864
-int dbat_extend(dbat* dbat, int i_mark) {
- /* CHECK THIS if ( i_mark ) i = i_mark - dbat->oid_mark; */
- size_t newsize = MAX(MIN(MAX(MINCHUNK,dbat->max_sz*2),(size_t)(dbat->max_sz+MAXCHUNK)),(size_t)i_mark);
+int dbat_extend(dbat* dbat, oid min_i /*raw-index*/, size_t forced_size) {
size_t newsize;
if ( forced_size ) {
newsize = forced_size;
} else {
newsize = MAX(MIN(MAX(MINCHUNK,dbat->raw_max*2),(size_t)(dbat->raw_max+MAXCHUNK)),(size_t)min_i);
}
/* first check if the number of BUN's < INT_MAX. If this was the case
- and the previous time INT_MAX was returned this means the BAT cannot
@@ -156,94 +149,34 @@ if ( newsize > INT_MAX ) { newsize = INT_MAX;
- if ( dbat->max_sz == INT_MAX ) {
- if ( dbat->raw_max == INT_MAX ) { GDKerror("dbat_extend: BATextend["%s"](size>INT_MAX) fails\n","incomplete"); return -1; } }
- if ( 0 ) { stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->max_sz,newsize); }
- dbat->max_sz= newsize;
+#if 0
- stream_printf(GDKout,"dbat_extend[%s](%d -> %d)\n",dbat->name,dbat->raw_max,newsize);
+#endif
- dbat->raw_max= newsize;
- dbat->seq_max = dbat->raw_max + dbat->seqbase; if ( !(dbat->bat = BATextend(dbat->bat,newsize)) ) { GDKerror("dbat_extend: BATextend["%s"](to %d) fails\n","incomplete",newsize); return -1; } dbat->cast.voidCAST = (void*)BUNfirst(dbat->bat);
- /*
* now check if there's a default value handler used
*
*/
- if ( dbat->dflt ) {
switch( dbat->bat->ttype ) {
case TYPE_int : {
int v = dbat->dflt_int;
int *to = &dbat->cast.intCAST[dbat->max_sz];
for(register int *p = &dbat->cast.intCAST[dbat->max_i]; p<to;)
*p++ = v;
break;
}
case TYPE_chr: {
chr v = dbat->dflt_chr;
chr *to = &dbat->cast.chrCAST[dbat->max_sz];
for(register chr *p = &dbat->cast.chrCAST[dbat->max_i]; p<to;)
*p++ = v;
break;
}
case TYPE_oid: {
oid v = dbat->dflt_oid;
oid *to = &dbat->cast.oidCAST[dbat->max_sz];
for(register oid *p = &dbat->cast.oidCAST[dbat->max_i]; p<to;)
*p++ = v;
break;
}
default:
GDKerror("dbat_extend: bad ttype\n");
return -1;
}
- }
- /* */ return 1;
}
int dbat_sizeHint(dbat* dbat, int sizeHint_mark) {
int sizeHint = sizeHint_mark - dbat->oid_mark;
- int estimate = dbat->max_i + sizeHint;
- return dbat_extend(dbat, estimate);
-}
-INLINE static int dbat_set_oid(dbat* dbat, int pos_mark, oid v) {
- register int pos;
int sizeHint = sizeHint_mark - dbat->seqbase;
- int estimate = dbat->raw_max + sizeHint;
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) {
dbat->cast.oidCAST[pos] = v;
return 1;
- } else {
if ( pos >= dbat->max_sz ) {
if ( dbat_extend(dbat,pos) < 0 )
return -1;
}
dbat->max_i = pos + 1;
dbat->cast.oidCAST[pos] = v;
return 1;
- }
- return dbat_extend(dbat, estimate, 0);
}
-INLINE static int dbat_set_int(dbat* dbat, int pos_mark, int v) {
- register int pos;
+#define dbat_set_oid(DBAT,I,V) (DBAT)->cast.oidCAST[I-(DBAT)->seqbase] = V
- if ( (pos=pos_mark - dbat->oid_mark) < dbat->max_i ) {
dbat->cast.intCAST[pos] = v;
return 1;
- } else {
if ( pos >= dbat->max_sz ) {
if ( dbat_extend(dbat,pos) < 0 )
return -1;
}
dbat->max_i = pos + 1;
dbat->cast.intCAST[pos] = v;
return 1;
- }
-} +#define dbat_set_int(DBAT,I,V) (DBAT)->cast.intCAST[I-(DBAT)->seqbase] = V
/************************************************
@@ -315,9 +248,6 @@
/************************************************
- First the temporary shredder for Tijah by JF
*/
INLINE static oid @@ -328,15 +258,15 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTag, (str)t);
- if ( bun )
- /* if ( (bun = BUNfnd(tjctx->hm_globalTag,t)) ) OLD */
- if ( bun ) { return *(oid*)BUNtail(tjctx->hm_globalTag,bun);
- else {
if ( !BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) {
- } else {
if ( BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) {
return tjctx->n_globalTag++;
} else { GDKerror("INSERT OF \"%s\" in globalTag fails.\n"); return oid_nil;
} else
return tjctx->n_globalTag++;
}}
#endif } @@ -349,10 +279,7 @@ BUN bun;
HASHfnd_str(bun, tjctx->hm_globalTerm, (str)t);
- if ( bun ) {
return *(oid*)BUNtail(tjctx->hm_globalTerm,bun);
- } else
return oid_nil;
- return ( bun ? *(oid*)BUNtail(tjctx->hm_globalTerm,bun) : oid_nil );
}
INLINE static oid @@ -366,22 +293,35 @@ if ( bun ) return *(oid*)BUNtail(tjctx->hm_globalTerm,bun); else {
if ( !BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){
if ( BUNins(tjctx->b_globalTerm, &tjctx->n_globalTerm, (str)t, FALSE)){
return tjctx->n_globalTerm++;
} else { GDKerror("INSERT OF \"%s\" in globalTerm fails.\n"); return oid_nil;
} else
return tjctx->n_globalTerm++;
}}
#endif }
-#define tj_add2plane(TJCTX,O) \
- ((dbat_set_oid(&(TJCTX)->dbat_collPre, (TJCTX)->tijahPre, O) < 0) \
? oid_nil : ((oid)(TJCTX)->tijahPre++))
+INLINE oid tj_extend_plane(struct tijahContextStruct *tjctx) {
- oid base = tjctx->tijahPre - tjctx->dbat_collPre.seqbase;
-#define insertPreSize(TJCTX,POS,SIZE) \
- dbat_set_int(&TJCTX->dbat_collSize,(int)POS,SIZE)
- if ( base >= tjctx->dbat_collPre.raw_max ) {
- if ( dbat_extend(&tjctx->dbat_collPre,base, 0) < 0 )
return oid_nil;
- /* IMPORTANT: the size of the two bats is synchronized by the use
* of the forced size (last) parameter of dbat_extend
*/
- if ( dbat_extend(&tjctx->dbat_collSize,base,tjctx->dbat_collPre.raw_max) < 0 )
return oid_nil;
- }
- return tjctx->tijahPre++;
+}
+#define tj_newPre(TJCTX) \
- (((TJCTX)->tijahPre < (TJCTX)->dbat_collPre.seq_max) \
- ? \
- ((oid)(TJCTX)->tijahPre++) : tj_extend_plane(TJCTX))
int handleTijahTerm(struct tijahContextStruct *tjctx, char* term) { @@ -397,13 +337,13 @@ } } if ( (termOid = tj_termOid(tjctx, term)) == oid_nil )
return 0;
} if ( termOid ) { /* term is not a stopword */return -1;
if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil )
if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0;
if ( insertPreSize(tjctx,tjPre,0) < 0 )
return -1;
dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid);
dbat_set_int(&tjctx->dbat_collSize,(int)tjPre,0);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:TERM: "%s", termoid=%d, tjPre=%d\n",tjctx->name,term,termOid,tjPre); #endif @@ -416,40 +356,13 @@ return 1; }
-/************
- The part where the Strings from Pathfinder are shredded into words
- by Tijah. The USE_FLEX macro determines if the strings is shredded
- by Hennings fancy flex scanner or Jan's simple strtok() scanner.
- */
-const char* obsoleteNexiChars = " \t\n\r,:;&*%$#!@=";
-int -useStrtokScanner(tjCtx* tjctx, char* s) -{
- char *t;
- int sz = 0;
-#ifdef TJ_TRACE
- if (TJ_TRACE) stream_printf(GDKout,"C[%s]:CHARACTERS:\n",tjctx->name);
-#endif
- if ( (t = strtok(s,obsoleteNexiChars)) ) do {
- /* not the empty string here */
if ( handleTijahTerm(tjctx,t) < 0 )
return -1;
sz++;
- } while ( (t=strtok(NULL,obsoleteNexiChars)) );
- return 1;
-}
/************************************************
- Now the real output handlers
*/
-#ifdef notused +#if 0 static int handle_sizeHint(XqueryCtx* ctx, int hinted_size) { tjCtx* tjctx = (tjCtx*)ctx->driverWs; @@ -502,14 +415,12 @@ return (str)str_nil; }
-#define GUESSFORCE FALSE
/*
- Replace the value of a collection parameter int the collection parameter
- bat
*/ static int replaceCollParam(tjCtx* tjctx, str param, str val) {
- return ( BUNreplace(tjctx->b_collParam,param,val,GUESSFORCE) != NULL );
- return ( BUNreplace(tjctx->b_collParam,param,val,FALSE) != NULL );
}
static BAT* @@ -894,10 +805,10 @@ /* if ( DOEMIT(tjctx) ) { */ if ( (termOid = tj_tagOid(tjctx, name)) == oid_nil ) return 0;
if ( (tjPre = tj_add2plane(tjctx, termOid)) == oid_nil )
if ( (tjPre = tj_newPre(tjctx) ) == oid_nil ) return 0;
dbat_set_oid(&tjctx->dbat_collPre, tjPre, termOid); if ( tj_pushTag(tjctx,tjPre) < 0 ) return 0;
if ( 0 ) stream_printf(GDKout,"C[%s]:startElement: \"%s\", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:startElement: "%s", termoid=%d, Tijah pre#=%d, Pathfinder pre#=%d\n",tjctx->name,name,termOid,tjPre,pre); #endif @@ -913,8 +824,7 @@ --tjctx->doc_height; oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */
- if ( insertPreSize(tjctx,start,size) < 0 )
return 0;
- dbat_set_int(&tjctx->dbat_collSize,(int)start,size);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:empty_endElement: "%s"\n", tjctx->name,""); #endif @@ -934,8 +844,7 @@ /* if ( DOEMIT(tjctx) ) { */ oid start = tj_popTag(tjctx); /* oid of the first node of the element */ int size = tjctx->tijahPre - start - 1; /* the Tijah element size */
- if ( insertPreSize(tjctx,start,size) < 0 )
return 0;
- dbat_set_int(&tjctx->dbat_collSize,(int)start,size);
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:endElement: "%s"\n", tjctx->name,name); #endif @@ -944,8 +853,6 @@ return 1; }
-#define USE_FLEX 1
/**
- Output generation handler. Handles equivalent of * SAX characters() event.
*/ @@ -954,28 +861,23 @@ EMPTY_CHECK; tjCtx* tjctx = (tjCtx*)ctx->driverWs;
- register char* p = (char*)ch;
- while( *p && isspace(*p) ) p++;
- if ( !*p )
return 1;
#ifdef TJ_TRACE
- if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, (char*)ch);
- if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:handle_characters(%s) start\n",tjctx->name, p);
#endif
if ( DOEMIT(tjctx) ) {
-#ifdef USE_FLEX
return useFlexScanner((char*)ch,strlen((char*)ch),tjctx);
-#else
return useStrtokScanner(tjctx,(char*)ch);
-#endif
} return 1;return useFlexScanner(p,tjctx);
}
char* normalizeTerm(struct tijahContextStruct *tjctx, char* term ) { char *res; -#ifdef USE_FLEX
res = flexScanOneTerm((char*)term,strlen((char*)term));
-#else
- res = strtok(term,obsoleteNexiChars);
-#endif
- /* INCOMPLETE, should make shure tijahContext is always avail. here */
res = flexScanOneTerm((char*)term); if ( res && tjctx && tjctx->stemCtx->stem) { if ( !(res = (char*)tjctx->stemCtx->stem(tjctx->stemCtx,(char*)res)) ) { /* must be a stopword */
@@ -986,14 +888,6 @@ }
int CMDtj_normalizeTerm(char** res, str term, str stemmer) { -//Leave tokenization disabled for now -// char* tokenized; -//#ifdef USE_FLEX -// tokenized = flexScanOneTerm(term,strlen(term)); -//#else -// tokenized = strtok(term,obsoleteNexiChars); -//#endif
tjStemCtx* stemCtx = getStemmingContext( stemmer );
if ( stemCtx->stem ) {
@@ -1123,13 +1017,9 @@ #ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINISH INDEXING\n",tjctx->name); #endif
/* feature not used anymore ????? */
- if ( 0 /* ROEL CHANGE VIRTUAL ROOT SIZE HERE */ )
insertPreSize(tjctx,0,tjctx->tijahPre - 1);
- if ( dbat_finalize(&tjctx->dbat_collPre) < 0 )
- if ( dbat_finalize(&tjctx->dbat_collPre, tjctx->tijahPre) < 0 ) return GDK_FAIL;
- if ( dbat_finalize(&tjctx->dbat_collSize) < 0 )
- if ( dbat_finalize(&tjctx->dbat_collSize, tjctx->tijahPre) < 0 ) return GDK_FAIL;
#ifdef TJ_TRACE if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]:FINALIZED DIRECT BATS\n",tjctx->name);
Index: pftijah_tokenize.l
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_tokenize.l,v retrieving revision 1.12 retrieving revision 1.13 diff -u -d -r1.12 -r1.13 --- pftijah_tokenize.l 9 Jan 2007 15:44:39 -0000 1.12 +++ pftijah_tokenize.l 27 Feb 2007 15:43:37 -0000 1.13 @@ -115,7 +115,40 @@
%%
-int useFlexScanner(char* buf, int len, struct tijahContextStruct* tjCtx) { +int OPT0useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) {
- /* UPDATE: this delivers very strange testset results and should not be
- used I think.
- */
- /* This is an optimized version of the flex scanner which does not copy the
- input buffer. The only strange thing about this interface is that it
- requires 2 YY_END_OF_BUFFER_CHAR (eg. 0) at the end of the buffer. The
- size of the buffer is inclusive the 2 0's.
- The last zero is toggled with its original value to prevent corruption
- of memory management tables. This was for me the only way to prevent
- copying here.
- */
- int len = strlen(buf);
- char remember = buf[len+1];
- buf[len+1] = YY_END_OF_BUFFER_CHAR;
- YY_BUFFER_STATE myBuf = yy_scan_buffer(buf, len+2);
- if ( !myBuf ) {
stream_printf(GDKout,"# useFlexScanner: unable to get setup non-copy buffer.");
return 0;
- }
- while ( pftijah_tokenizelex() ) {
/* stream_printf(GDKout,"# scan(%s).\n",pftijah_tokenizetext); */
if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) )
return 0;
- }
- yy_delete_buffer(myBuf);
- buf[len+1] = remember;
- return 1;
+}
+int useFlexScanner(char* buf, struct tijahContextStruct* tjCtx) {
- // the original
- int len = strlen(buf); YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); while (pftijah_tokenizelex()) { if ( !handleTijahTerm(tjCtx, pftijah_tokenizetext) )
@@ -125,6 +158,40 @@ return 1; }
+int OPT2useFlexScanner(char* input, struct tijahContextStruct* tjctx) +{
- /* the fast function. This function is in the pftijah context with lots
* of small strings to tokenize many times faster as the flex and the
* strtok() methods which seem to have a rather larger overhead
*/
- register char* s = input;
- register char x;
+// #define EMIT x=*s; *s=0; stream_printf(GDKout,"#[%s]\n",base);if (!handleTijahTerm(tjctx,base)) return 0; *s=x +#define EMIT x=*s; *s=0; if (!handleTijahTerm(tjctx,base)) return 0; *s=x
- while ( 1 ) {
while ( isspace( *s ) ) s++;
if ( *s ) {
char* base = s;
if ( isalnum(*s) ) {
if ( isdigit(*s) ) {
while ( isdigit(*++s) ) ;
EMIT;
} else {
if (isupper(*s)) *s=tolower(*s);
while ( isalnum(*++s) ) if (isupper(*s)) *s=tolower(*s);
EMIT;
}
} else {
// INCOMPLETE, ENTITIES HERE
// stream_printf(GDKout,"#[SKIPPING:%c]\n",*s);
s++;
}
} else
return 1;
- }
+}
char* tijah_tokenize_string(char* buf, int len, char* outbuf) { int cnt = 0; YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); @@ -137,9 +204,10 @@ return outbuf; }
-char* flexScanOneTerm(char* buf, int len) { +char* flexScanOneTerm(char* buf) { char *res; char resBUFF[256];
int len = strlen(buf);
YY_BUFFER_STATE myBuf = yy_scan_bytes(buf, len); if ( pftijah_tokenizelex() ) {
Index: nexi.c
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v retrieving revision 1.49 retrieving revision 1.50 diff -u -d -r1.49 -r1.50 --- nexi.c 23 Feb 2007 15:11:05 -0000 1.49 +++ nexi.c 27 Feb 2007 15:43:37 -0000 1.50 @@ -455,6 +455,7 @@ /* * Now find out if the collection is fragmented or not. */
- /* INCOMPLETE, ERROR HERE WITH REFCOUNTS IN HEAD */ BAT* fb = pftu_lookup_bat(pftu_batname1("tj_%s_fragments",(char*)parserCtx->collection,0)); if ( ! fb ) { stream_printf(GDKerr,"Error: cannot find fragments bat for collection "%s".\n",parserCtx->collection);
@@ -471,6 +472,8 @@ parserCtx->ffPfx = ""; parserCtx->flastPfx = ", str(1)"; }
- BBPunfix(BBPcacheid(fb));
- fb = NULL; // Some special cases for NLLR, since NLLR only works with COARSE2 at the moment if ( txt_retr_model->model == MODEL_NLLR ) { // Switch to COARSE2 algebra for NLLR
Index: pftijah_util.mx
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_util.mx,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- pftijah_util.mx 9 Jan 2007 17:15:23 -0000 1.2 +++ pftijah_util.mx 27 Feb 2007 15:43:37 -0000 1.3 @@ -73,6 +73,7 @@ if ( b == bat_nil ) { return NULL; } else {
}BBPfix(b); return BBPdescriptor(b);
}
Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=D... _______________________________________________ Monetdb-pf-checkins mailing list Monetdb-pf-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins
-- Sjoerd Mullender
participants (2)
-
flokstra
-
Sjoerd Mullender