MonetDB: Jul2017 - Properly respect DEFAULT values in the Python...

Stefan Manegold Stefan.Manegold at cwi.nl
Mon Nov 13 20:08:04 CET 2017


Mark,

thanks for taking care of this!!

Unfortunately, it does not seem to work correctly, yet.

For strings, as in your pyloader01.sql test (see below),
the used/inserted default value appears to include the
single-quotes "'" form the SQL syntax --- I assume you
approved those by accident --- and for integers "strange
values" are inserted; see test
sql/test/BugTracker-2017/Tests/python_loader_clobbers_default_with_null.Bug-6464.sql

Best,
Stefan

----- On Nov 13, 2017, at 5:47 PM, Mark Raasveldt commits+m.raasveldt=cwi.nl at monetdb.org wrote:

> Changeset: 4303f7489624 for MonetDB
> URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4303f7489624
> Modified Files:
>	sql/backends/monet5/Tests/pyloader01.sql
>	sql/backends/monet5/Tests/pyloader01.stable.out
>	sql/backends/monet5/UDF/pyapi/emit.c
>	sql/backends/monet5/UDF/pyapi/pyloader.c
>	sql/include/sql_catalog.h
>	sql/server/rel_updates.c
> Branch: Jul2017
> Log Message:
> 
> Properly respect DEFAULT values in the Python loader.
> 
> 
> diffs (145 lines):
> 
> diff --git a/sql/backends/monet5/Tests/pyloader01.sql
> b/sql/backends/monet5/Tests/pyloader01.sql
> --- a/sql/backends/monet5/Tests/pyloader01.sql
> +++ b/sql/backends/monet5/Tests/pyloader01.sql
> @@ -1,7 +1,7 @@
> 
> START TRANSACTION;
> 
> -CREATE TABLE mytable(a DOUBLE, d int, s STRING);
> +CREATE TABLE mytable(a DOUBLE, d int, s STRING DEFAULT 'hello');
> 
> CREATE LOADER myfunc() LANGUAGE PYTHON {
> 	_emit.emit({'a':42,'d':1})
> @@ -23,10 +23,10 @@ SELECT name,func,mod,language,type,side_
> 
> 
> -- there is a reason for this, functions with 0, 1, 2 and 3+ arguments are
> handled differently.
> +COPY LOADER INTO mytable FROM myfunc();
> COPY LOADER INTO mytable FROM myfunc3(46, 'asdf', 3.2);
> COPY LOADER INTO mytable FROM myfunc2(45, 'asdf');
> COPY LOADER INTO mytable FROM myfunc1(44);
> -COPY LOADER INTO mytable FROM myfunc();
> 
> SELECT * FROM mytable;
> 
> diff --git a/sql/backends/monet5/Tests/pyloader01.stable.out
> b/sql/backends/monet5/Tests/pyloader01.stable.out
> --- a/sql/backends/monet5/Tests/pyloader01.stable.out
> +++ b/sql/backends/monet5/Tests/pyloader01.stable.out
> @@ -56,11 +56,11 @@ Ready.
> % sys.mytable,	sys.mytable,	sys.mytable # table_name
> % a,	d,	s # name
> % double,	int,	clob # type
> -% 24,	1,	5 # length
> +% 24,	1,	7 # length
> +[ 42,	1,	"'hello'"	]
> [ 46,	4,	"hello"	]
> -[ 45,	3,	NULL	]
> -[ 44,	2,	NULL	]
> -[ 42,	1,	NULL	]
> +[ 45,	3,	"'hello'"	]
> +[ 44,	2,	"'hello'"	]
> #DROP TABLE mytable;
> #DROP ALL LOADER myfunc;
> #CREATE LOADER myfunc() LANGUAGE PYTHON {
> diff --git a/sql/backends/monet5/UDF/pyapi/emit.c
> b/sql/backends/monet5/UDF/pyapi/emit.c
> --- a/sql/backends/monet5/UDF/pyapi/emit.c
> +++ b/sql/backends/monet5/UDF/pyapi/emit.c
> @@ -199,6 +199,7 @@ PyObject *PyEmit_Emit(PyEmitObject *self
> 
> 				self->cols[self->ncols].b = COLnew(0, bat_type, 0, TRANSIENT);
> 				self->cols[self->ncols].name = GDKstrdup(val);
> +				self->cols[self->ncols].def = NULL;
> 				if (self->nvals > 0) {
> 					// insert NULL values up until the current entry
> 					for (ai = 0; ai < self->nvals; ai++) {
> @@ -209,9 +210,9 @@ PyObject *PyEmit_Emit(PyEmitObject *self
> 							goto wrapup;
> 						}
> 					}
> -					self->cols[i].b->tnil = 1;
> -					self->cols[i].b->tnonil = 0;
> -					BATsetcount(self->cols[i].b, self->nvals);
> +					self->cols[self->ncols].b->tnil = 1;
> +					self->cols[self->ncols].b->tnonil = 0;
> +					BATsetcount(self->cols[self->ncols].b, self->nvals);
> 				}
> 				self->ncols++;
> 			}
> @@ -337,14 +338,21 @@ PyObject *PyEmit_Emit(PyEmitObject *self
> 				self->cols[i].b->tnonil = 1 - self->cols[i].b->tnil;
> 			}
> 		} else {
> +			void* nill_value = ATOMnil(self->cols[i].b->ttype);
> +			void* default_value = self->cols[i].def ?
> +								self->cols[i].def :
> +								nill_value;
> 			for (ai = 0; ai < (size_t)el_count; ai++) {
> -				if (BUNappend(self->cols[i].b, ATOMnil(self->cols[i].b->ttype),
> +				if (BUNappend(self->cols[i].b,
> +							  default_value,
> 							  0) != GDK_SUCCEED) {
> 					goto wrapup;
> 				}
> 			}
> -			self->cols[i].b->tnil = 1;
> -			self->cols[i].b->tnonil = 0;
> +			if (BATatoms[self->cols[i].b->ttype].atomCmp(default_value, nill_value) ==
> 0) {
> +				self->cols[i].b->tnil = 1;
> +				self->cols[i].b->tnonil = 0;
> +			}
> 		}
> 		BATsetcount(self->cols[i].b, self->nvals + el_count);
> 	}
> diff --git a/sql/backends/monet5/UDF/pyapi/pyloader.c
> b/sql/backends/monet5/UDF/pyapi/pyloader.c
> --- a/sql/backends/monet5/UDF/pyapi/pyloader.c
> +++ b/sql/backends/monet5/UDF/pyapi/pyloader.c
> @@ -146,6 +146,9 @@ PYFUNCNAME(PyAPIevalLoader)(Client cntxt
> 			assert(i < pci->retc);
> 			cols[i].name = GDKstrdup(*((char **)n->data));
> 			n = n->next;
> +			assert(n);
> +			cols[i].def = n->data;
> +			n = n->next;
> 			cols[i].b =
> 				COLnew(0, getBatType(getArgType(mb, pci, i)), 0, TRANSIENT);
> 			cols[i].b->tnil = 0;
> diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h
> --- a/sql/include/sql_catalog.h
> +++ b/sql/include/sql_catalog.h
> @@ -599,6 +599,7 @@ extern node *find_sql_func_node(sql_sche
> typedef struct {
> 	BAT *b;
> 	char* name;
> +	void* def;
> } sql_emit_col;
> 
> #endif /* SQL_CATALOG_H */
> diff --git a/sql/server/rel_updates.c b/sql/server/rel_updates.c
> --- a/sql/server/rel_updates.c
> +++ b/sql/server/rel_updates.c
> @@ -1173,7 +1173,7 @@ table_column_types(sql_allocator *sa, sq
> }
> 
> static list *
> -table_column_names(sql_allocator *sa, sql_table *t)
> +table_column_names_and_defaults(sql_allocator *sa, sql_table *t)
> {
> 	node *n;
> 	list *types = sa_list(sa);
> @@ -1181,6 +1181,7 @@ table_column_names(sql_allocator *sa, sq
> 	if (t->columns.set) for (n = t->columns.set->h; n; n = n->next) {
> 		sql_column *c = n->data;
> 		append(types, &c->base.name);
> +		append(types, c->def);
> 	}
> 	return types;
> }
> @@ -1577,7 +1578,7 @@ copyfromloader(mvc *sql, dlist *qname, s
> 		return NULL;
> 	}
> 	((sql_subfunc*) import->f)->res = table_column_types(sql->sa, t);
> -	((sql_subfunc*) import->f)->colnames = table_column_names(sql->sa, t);
> +	((sql_subfunc*) import->f)->colnames =
> table_column_names_and_defaults(sql->sa, t);
> 
> 	for (n = t->columns.set->h; n; n = n->next) {
> 		sql_column *c = n->data;
> _______________________________________________
> checkin-list mailing list
> checkin-list at monetdb.org
> https://www.monetdb.org/mailman/listinfo/checkin-list

-- 
| Stefan.Manegold at CWI.nl | DB Architectures   (DA) |
| www.CWI.nl/~manegold/  | Science Park 123 (L321) |
| +31 (0)20 592-4212     | 1098 XG Amsterdam  (NL) |


More information about the developers-list mailing list