[Monetdb-developers] [Monetdb-checkins] MonetDB5/src/modules/mal/rdf rdf_shredder.mx, Feb2010, 1.13, 1.13.2.1

Sjoerd Mullender sjoerd at acm.org
Mon Feb 22 22:52:57 CET 2010


Are you sure *all* these changes are for disabling tokenizer?  Including
the property changes?

None of this will now be propagated, so if any of this should land in
the HEAD, you need to do it manually (but then please identically).

On 2010-02-22 16:43, Lefteris Sidirourgos wrote:
> Update of /cvsroot/monetdb/MonetDB5/src/modules/mal/rdf
> In directory sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv32552/src/modules/mal/rdf
> 
> Modified Files:
>       Tag: Feb2010
> 	rdf_shredder.mx 
> Log Message:
> disable the tokenizer.mx from m5
> 
> 
> Index: rdf_shredder.mx
> ===================================================================
> RCS file: /cvsroot/monetdb/MonetDB5/src/modules/mal/rdf/rdf_shredder.mx,v
> retrieving revision 1.13
> retrieving revision 1.13.2.1
> diff -u -d -r1.13 -r1.13.2.1
> --- rdf_shredder.mx	8 Jan 2010 12:08:58 -0000	1.13
> +++ rdf_shredder.mx	22 Feb 2010 15:43:41 -0000	1.13.2.1
> @@ -27,7 +27,9 @@
>  #include "url.h"
>  #include "rdf.h"
>  #include "raptor.h"
> +#if 0
>  #include "../tokenizer.h"
> +#endif
>  
>  typedef struct graphBATdef {
>  	graphBATType batType;    /* BAT type             */
> @@ -36,6 +38,8 @@
>  	int tailType;            /* type of right column */
>  } graphBATdef;
>  
> +static BUN batsz = 10000000;
> +
>  /* this list should be kept alligned with the graphBATType enum */
>  #if STORE == TRIPLE_STORE
>   static graphBATdef graphdef[N_GRAPH_BAT] = {
> @@ -132,7 +136,8 @@
>   @:rdf_BUNappend_unq(@1, @2)@
>  #endif
>  
> -@= rdf_BUNappend_unq
> +
> +@= rdf_BUNappend_unq_1
>  bun = BUNfnd(BATmirror(@1),(ptr)@2);
>  if (bun == BUN_NONE) {
>  	if (BATcount(@1) > 4 * @1->T->hash->mask) {
> @@ -148,6 +153,20 @@
>  	bun = (@1)->hseqbase + bun;
>  }
>  
> +@= rdf_BUNappend_unq
> +bun = BUNfnd(BATmirror(@1),(ptr)@2);
> +if (bun == BUN_NONE) {
> +	if (BATcount(@1) > 4 * @1->T->hash->mask) {
> +		HASHdestroy(@1);
> +		BAThash(BATmirror(@1), 2*BATcount(@1));
> +	}
> +	bun = (BUN) @1->batCount;
> +	@1 = BUNappend(@1, (ptr)@2, TRUE);
> +	if (@1 == NULL) {
> +		@:raptor_exception(pdata, "could not append in at 1")@
> +	}
> +}
> +
>  @= rdf_tknzr_insert
>  {
>  	str t = @1;
> @@ -226,12 +245,9 @@
>  
>  	/* disable all properties */
>  	b->tsorted = FALSE;
> -	b->T->nosorted = 0;
>  	b->tdense = FALSE;
> -	b->T->nodense = 0;
>  	b->tkey = FALSE;
> -	b->T->nokey[0] = 0;
> -	b->T->nokey[1] = 1;
> +	b->hdense = TRUE;
>  
>  	return b;
>  }
> @@ -264,7 +280,7 @@
>  		pdata->graph[i] = create_BAT (
>  				graphdef[i].headType,
>  				graphdef[i].tailType,
> -				BATTINY);                       /* DOTO: estimate size */
> +				batsz);                       /* DOTO: estimate size */
>  		if (pdata->graph[i] == NULL) {
>  			return NULL;
>  		}
> @@ -274,7 +290,7 @@
>  	pdata->graph[MAP_LEX] = create_BAT (
>  			graphdef[MAP_LEX].headType,
>  			graphdef[MAP_LEX].tailType,
> -			BATTINY);                           /* DOTO: estimate size */
> +			batsz);                           /* DOTO: estimate size */
>  	if (pdata->graph[MAP_LEX] == NULL) {
>  		return NULL;
>  	}
> @@ -346,10 +362,10 @@
>  static str
>  post_processing (parserData *pdata)
>  {
> -#if STORE == TRIPLE_STORE
>  	BUN cnt;
>  	BAT *map_oid = NULL, *S = NULL, *P = NULL, *O = NULL, *ctref= NULL;
>  	BAT **graph = pdata->graph;
> +#ifdef _TKNZR_H
>  	BATiter bi, mi;
>  	BUN p, d, r;
>  	oid *bt;
> @@ -379,7 +395,31 @@
>  	P = graph[P_sort];
>  	O = graph[O_sort];
>  	cnt = BATcount(S);
> +#else
> +	/* order MAP_LEX */
> +	BATorder(BATmirror(graph[MAP_LEX]));
> +	map_oid = BATmark(graph[MAP_LEX], 0);   /* BATmark will create a copy */
> +	BATorder(map_oid);
> +	BATsetaccess(map_oid, BAT_READ);        /* force BAtmark not to copy bat */
> +	map_oid = BATmirror(BATmark(BATmirror(map_oid), 0));
> +	BATsetaccess(graph[MAP_LEX], BAT_READ); /* force BATmark not to copy bat */
> +	graph[MAP_LEX] = BATmirror(BATmark(BATmirror(graph[MAP_LEX]), 0));
> +
> +	/* convert old oids of S_sort, P_sort, O_sort to new ones */
> +	cnt = BATcount(graph[S_sort]);
> +	S = BATleftfetchjoin(graph[S_sort], map_oid, cnt);
> +	if (S == NULL) goto bailout;
> +	BBPreclaim(graph[S_sort]);
> +	P = BATleftfetchjoin(graph[P_sort], map_oid, cnt);
> +	if (P == NULL) goto bailout;
> +	BBPreclaim(graph[P_sort]);
> +	O = BATleftfetchjoin(graph[O_sort], map_oid, cnt);
> +	if (O == NULL) goto bailout;
> +	BBPreclaim(graph[O_sort]);
> +	BBPreclaim(map_oid);
> +#endif
>  
> +#if STORE == TRIPLE_STORE
>  	/* order SPO/SOP */
>  	graph[S_sort] = BATmirror(BATsort(BATmirror(S))); /* sort on S */
>  	@:order(graph[S_sort],P,O,PO)@
> @@ -408,6 +448,14 @@
>  
>  	return MAL_SUCCEED;
>  
> +#elif STORE == MLA_STORE
> +	graph[S_sort] = S;
> +	graph[P_sort] = P;
> +	graph[O_sort] = O;
> +
> +	return MAL_SUCCEED;
> +#endif
> +
>  bailout:
>  	if (map_oid != NULL) BBPreclaim(map_oid);
>  	if (ctref   != NULL) BBPreclaim(ctref);
> @@ -415,11 +463,6 @@
>  	if (P       != NULL) BBPreclaim(P);
>  	if (O       != NULL) BBPreclaim(O);
>  	return NULL;
> -
> -#elif STORE == MLA_STORE
> -	(void) pdata;
> -	return MAL_SUCCEED;
> -#endif
>  }
>  
>  @= clean_raptor
> @@ -455,15 +498,21 @@
>  	(void) graphname;
>  
>  	/* init tokenizer */
> +#ifdef _TKNZR_H
>  	if (TKNZRopen (NULL, schema) != MAL_SUCCEED) {
>  		throw(RDF, "rdf.rdfShred",
>  				"could not open the tokenizer\n");
>  	}
> +#else
> +	(void) schema;
> +#endif
>  
>  	/* Init pdata  */
>  	pdata = parserData_create(*location);
>  	if (pdata == NULL) {
> +#ifdef _TKNZR_H
>  		TKNZRclose(&iret);
> +#endif
>  		@:clean@
>  		throw(RDF, "rdf.rdfShred",
>  				"could not allocate enough memory for pdata\n");
> @@ -473,7 +522,9 @@
>  	raptor_init();
>  	pdata->rparser = rparser = raptor_new_parser("guess");
>  	if (rparser == NULL) {
> +#ifdef _TKNZR_H
>  		TKNZRclose(&iret);
> +#endif
>  		raptor_finish();
>  		@:clean@
>  		throw(RDF, "rdf.rdfShred", "could not create raptor parser object\n");
> @@ -484,7 +535,9 @@
>  	/* Parse URI or local file. */
>  	ret = URLisaURL(&isURI, location);
>  	if (ret != MAL_SUCCEED) {
> +#ifdef _TKNZR_H
>  		TKNZRclose(&iret);
> +#endif
>  		@:clean@
>  		return ret;
>  	} else if (isURI) {
> @@ -496,6 +549,9 @@
>  		iret = raptor_parse_file(rparser, uri, NULL);
>  	}
>  	@:clean_raptor@
> +#ifdef _TKNZR_H
> +	TKNZRclose(&iret);
> +#endif
>  
>  	graph = pdata->graph;
>  	assert (pdata->tcount == BATcount(graph[S_sort]) &&
> @@ -504,23 +560,18 @@
>  
>  	/* error check */
>  	if (iret) {
> -		TKNZRclose(&iret);
>  		@:clean@
>  		throw(RDF, "rdf.rdfShred", "parsing failed\n");
>  	}
>  	if (pdata->exception) {
> -		TKNZRclose(&iret);
>  		throw(RDF, "rdf.rdfShred", "%s\n", pdata->exceptionMsg);
>  	} else if (pdata->fatal) {
> -		TKNZRclose(&iret);
>  		throw(RDF, "rdf.rdfShred", "last fatal error was:\n%s\n",
>  				pdata->fatalMsg);
>  	} else if (pdata->error) {
> -		TKNZRclose(&iret);
>  		throw(RDF, "rdf.rdfShred", "last error was:\n%s\n",
>  				pdata->errorMsg);
>  	} else if (pdata->warning) {
> -		TKNZRclose(&iret);
>  		throw(RDF, "rdf.rdfShred", "last warning was:\n%s\n",
>  				pdata->warningMsg);
>  	}
> @@ -528,7 +579,6 @@
>  	/* post processing step */
>  	ret = post_processing(pdata);
>  	if (ret != MAL_SUCCEED) {
> -		TKNZRclose(&iret);
>  		@:clean@
>  		throw(RDF, "rdf.rdfShred", "could not post-proccess data");
>  	}
> @@ -536,7 +586,6 @@
>  	/* prepare return bat of bats */
>  	retbat = BATnew(TYPE_void, TYPE_bat, N_GRAPH_BAT);
>  	if (retbat == NULL) {
> -		TKNZRclose(&iret);
>  		@:clean@
>  		throw(RDF, "rdf.rdfShred",
>  				"could not allocate enough memory for return bat");
> @@ -548,7 +597,6 @@
>  	}
>  
>  	GDKfree(pdata);
> -	TKNZRclose(&iret);
>  	BBPkeepref(*retval = retbat->batCacheid);
>  
>  	return MAL_SUCCEED;
> 
> 
> ------------------------------------------------------------------------------
> Download Intel® Parallel Studio Eval
> Try the new software tools for yourself. Speed compiling, find bugs
> proactively, and fine-tune applications for parallel performance.
> See why Intel Parallel Studio got high marks during beta.
> http://p.sf.net/sfu/intel-sw-dev
> _______________________________________________
> Monetdb-checkins mailing list
> Monetdb-checkins at lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/monetdb-checkins


-- 
Sjoerd Mullender




More information about the developers-list mailing list