Re: [Monetdb-developers] MonetDB: default - Implemented another trivial case for grouping...
;-)
Dank je wel! --- Niet alleen voor deze, maar met naam ook voor de hele nieuwe group-by code en al die andere nieuwe schone code!!!
Stefan
On Tue, Aug 21, 2012 at 12:43:42PM +0200, Sjoerd Mullender wrote:
Changeset: 6f001c03cd8d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6f001c03cd8d Modified Files: gdk/gdk_group.c Branch: default Log Message:
Implemented another trivial case for grouping: all equal values.
diffs (122 lines):
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -44,11 +44,14 @@
- is always created. In other words, the groups argument may not be
- NULL, but the extents and histo arguments may be NULL.
- There are five different implementations of the grouping code.
- There are six different implementations of the grouping code.
- If it can be trivially determined that all groups are singletons,
- we can produce the outputs trivially.
- If all values in b are known to be equal (both sorted and reverse
- sorted), we produce a single group or copy the input group.
- If the input bats b and g are sorted, or if the subsorted flag is
- set (only used by BATsubsort), we only need to compare consecutive
- values.
@@ -102,6 +105,7 @@ BATgroup_internal(BAT **groups, BAT **ex
if (b->tkey || BATcount(b) <= 1 || (g && (g->tkey || BATtdense(g)))) { /* grouping is trivial: 1 element per group */
if (BATcount(b) == 1 && b->htype == TYPE_oid) ngrp = * (oid *) Hloc(b, BUNfirst(b)); elseALGODEBUG fprintf(stderr, "#BATgroup: trivial case: 1 element per group\n");
@@ -132,6 +136,62 @@ BATgroup_internal(BAT **groups, BAT **ex } return GDK_SUCCEED; }
- if (b->tsorted && b->trevsorted) {
/* all values are equal */
if (g == NULL) {
/* there's only a single group: 0 */
ALGODEBUG fprintf(stderr, "#BATgroup: trivial case: single output group\n");
ngrp = 0;
gn = BATconstant(TYPE_oid, &ngrp, BATcount(b));
if (gn == NULL)
goto error;
BATseqbase(gn, b->hseqbase);
*groups = gn;
if (extents) {
ngrp = gn->hseqbase;
en = BATconstant(TYPE_void, &ngrp, 1);
if (en == NULL)
goto error;
BATseqbase(BATmirror(en), ngrp);
*extents = en;
}
if (histo) {
wrd cnt = (wrd) BATcount(b);
hn = BATconstant(TYPE_wrd, &cnt, 1);
if (hn == NULL)
goto error;
*histo = hn;
}
return GDK_SUCCEED;
}
if ((extents == NULL) == (e == NULL) &&
(histo == NULL) == (h == NULL)) {
/* inherit given grouping; note that if
* extents/histo is to be returned, we need
* e/h available in order to copy them,
* otherwise we will need to calculate them
* which we will do using the "normal" case */
ALGODEBUG fprintf(stderr, "#BATgroup: trivial case: copy input groups\n");
gn = BATcopy(g, g->htype, g->ttype, 0);
if (gn == NULL)
goto error;
*groups = gn;
if (extents) {
en = BATcopy(e, e->htype, e->ttype, 0);
if (en == NULL)
goto error;
*extents = en;
}
if (histo) {
hn = BATcopy(h, h->htype, h->ttype, 0);
if (hn == NULL)
goto error;
*histo = hn;
}
return GDK_SUCCEED;
}
- } assert(g == NULL || !BATtdense(g)); /* i.e. g->ttype == TYPE_oid */ bi = bat_iterator(b); cmp = BATatoms[b->ttype].atomCmp;
@@ -167,6 +227,7 @@ BATgroup_internal(BAT **groups, BAT **ex (g == NULL || g->tsorted || g->trevsorted)) || subsorted) { /* we only need to compare each entry with the previous */
if (grps) prev = *grps++; pv = BUNtail(bi, BUNfirst(b));ALGODEBUG fprintf(stderr, "#BATgroup: compare consecutive values\n");
@@ -220,6 +281,7 @@ BATgroup_internal(BAT **groups, BAT **ex /* for each value, we need to scan all previous equal * values (a consecutive, possibly empty, range) to * see if we can find one in the same old group */
pv = BUNtail(bi, BUNfirst(b)); ngrps[0] = ngrp; if (extents)ALGODEBUG fprintf(stderr, "#BATgroup: subscan old groups\n");
@@ -284,6 +346,7 @@ BATgroup_internal(BAT **groups, BAT **ex } } else if (b->T->hash) { /* we already have a hash table on b */
hs = b->T->hash; for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) { v = BUNtail(bi, p);ALGODEBUG fprintf(stderr, "#BATgroup: use existing hash table\n");
@@ -343,6 +406,7 @@ BATgroup_internal(BAT **groups, BAT **ex * build an incomplete hash table on the fly--also see * BATassertHeadProps and BATderiveHeadProps for * similar code */
nme = BBP_physical(b->batCacheid); nmelen = strlen(nme); if ((hp = GDKzalloc(sizeof(Heap))) == NULL ||ALGODEBUG fprintf(stderr, "#BATgroup: create partial hash table\n");
Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list
participants (1)
-
Stefan Manegold