View | Details | Raw Unified | Return to bug 6533
Collapse All | Expand All

(-)a/monetdb5/modules/mal/pcre.c (-52 / +272 lines)
Lines 26-32 Link Here
26
26
27
#ifdef HAVE_LIBPCRE
27
#ifdef HAVE_LIBPCRE
28
#include <pcre.h>
28
#include <pcre.h>
29
#ifndef PCRE_STUDY_JIT_COMPILE
29
#ifdef PCRE_STUDY_JIT_COMPILE
30
#define HAVE_LIBPCRE_JIT
31
#else
30
/* old library version on e.g. EPEL 6 */
32
/* old library version on e.g. EPEL 6 */
31
#define pcre_free_study(x)		pcre_free(x)
33
#define pcre_free_study(x)		pcre_free(x)
32
#endif
34
#endif
Lines 65-76 Link Here
65
mal_export str BATPCREilike2(bat *ret, const bat *b, const str *pat);
67
mal_export str BATPCREilike2(bat *ret, const bat *b, const str *pat);
66
mal_export str BATPCREnotilike(bat *ret, const bat *b, const str *pat, const str *esc);
68
mal_export str BATPCREnotilike(bat *ret, const bat *b, const str *pat, const str *esc);
67
mal_export str BATPCREnotilike2(bat *ret, const bat *b, const str *pat);
69
mal_export str BATPCREnotilike2(bat *ret, const bat *b, const str *pat);
70
mal_export str PCREsimilarto3(bit *ret, const str *s, const str *pat, const str *esc);
71
mal_export str PCREsimilarto2(bit *ret, const str *s, const str *pat);
72
mal_export str PCREnotsimilarto3(bit *ret, const str *s, const str *pat, const str *esc);
73
mal_export str PCREnotsimilarto2(bit *ret, const str *s, const str *pat);
74
mal_export str BATPCREsimilarto(bat *ret, const bat *b, const str *pat, const str *esc);
75
mal_export str BATPCREsimilarto2(bat *ret, const bat *b, const str *pat);
76
mal_export str BATPCREnotsimilarto(bat *ret, const bat *b, const str *pat, const str *esc);
77
mal_export str BATPCREnotsimilarto2(bat *ret, const bat *b, const str *pat);
68
78
69
mal_export str PCRElikeselect2(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *caseignore, const bit *anti);
79
mal_export str PCRElikeselect2(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *caseignore, const bit *anti);
70
mal_export str PCRElikeselect1(bat *ret, const bat *bid, const bat *cid, const str *pat, const str *esc, const bit *anti);
80
mal_export str PCRElikeselect1(bat *ret, const bat *bid, const bat *cid, const str *pat, const str *esc, const bit *anti);
71
mal_export str PCRElikeselect3(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *anti);
81
mal_export str PCRElikeselect3(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *anti);
72
mal_export str PCRElikeselect4(bat *ret, const bat *bid, const bat *cid, const str *pat, const bit *anti);
82
mal_export str PCRElikeselect4(bat *ret, const bat *bid, const bat *cid, const str *pat, const bit *anti);
73
mal_export str PCRElikeselect5(bat *ret, const bat *bid, const bat *sid, const str *pat, const bit *anti);
83
mal_export str PCRElikeselect5(bat *ret, const bat *bid, const bat *sid, const str *pat, const bit *anti);
84
mal_export str PCREsimilartoselect2(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *caseignore, const bit *anti);
85
mal_export str PCREsimilartoselect3(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *anti);
86
mal_export str PCREsimilartoselect5(bat *ret, const bat *bid, const bat *sid, const str *pat, const bit *anti);
74
87
75
mal_export str LIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
88
mal_export str LIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
76
mal_export str LIKEjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
89
mal_export str LIKEjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
Lines 112-118 Link Here
112
#endif
125
#endif
113
126
114
static int
127
static int
115
re_simple(const char *pat)
128
re_simple(const char *pat, int similarto)
116
{
129
{
117
	int nr = 0;
130
	int nr = 0;
118
131
Lines 121-127 Link Here
121
	if (*pat == '%')
134
	if (*pat == '%')
122
		pat++;
135
		pat++;
123
	while (*pat) {
136
	while (*pat) {
124
		if (*pat == '_')
137
		if (*pat == '_' || (similarto && strchr("|*+?{([", *pat)))
125
			return 0;
138
			return 0;
126
		if (*pat++ == '%')
139
		if (*pat++ == '%')
127
			nr++;
140
			nr++;
Lines 132-140 Link Here
132
}
145
}
133
146
134
static int
147
static int
135
is_strcmpable(const char *pat, const str esc)
148
is_strcmpable(const char *pat, const str esc, int similarto)
136
{
149
{
137
	if (pat[strcspn(pat, "%_")])
150
	const char *specials = similarto ? "|*+?{([%_" : "%_";
151
	if (pat[strcspn(pat, specials)])
138
		return 0;
152
		return 0;
139
	return strlen(esc) == 0 || strstr(pat, esc) == NULL;
153
	return strlen(esc) == 0 || strstr(pat, esc) == NULL;
140
}
154
}
Lines 294-307 Link Here
294
pcre_likeselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore, int anti)
308
pcre_likeselect(BAT **bnp, BAT *b, BAT *s, const char *pat, int caseignore, int anti)
295
{
309
{
296
#ifdef HAVE_LIBPCRE
310
#ifdef HAVE_LIBPCRE
297
	int options = PCRE_UTF8 | PCRE_MULTILINE | PCRE_DOTALL;
311
	int options = PCRE_UTF8 | PCRE_MULTILINE | PCRE_DOTALL | PCRE_NO_AUTO_CAPTURE;
298
	pcre *re;
312
	pcre *re;
299
	pcre_extra *pe;
313
	pcre_extra *pe;
300
	const char *error;
314
	const char *error;
301
	int errpos;
315
	int errpos;
302
	int ovector[10];
316
	int ovector[10];
317
#ifdef HAVE_LIBPCRE_JIT
318
	pcre_jit_stack *jitstack;
319
#define FREE_PCRE_JIT() pcre_jit_stack_free(jitstack)
303
#else
320
#else
304
	int options = REG_NEWLINE | REG_NOSUB;
321
#define FREE_PCRE_JIT()
322
#endif
323
#else
324
	int options = REG_NEWLINE | REG_NOSUB | REG_EXTENDED;
305
	pcre re;
325
	pcre re;
306
	int errcode;
326
	int errcode;
307
#endif
327
#endif
Lines 325-336 Link Here
325
	if ((re = pcre_compile(pat, options, &error, &errpos, NULL)) == NULL)
345
	if ((re = pcre_compile(pat, options, &error, &errpos, NULL)) == NULL)
326
		throw(MAL, "pcre.likeselect",
346
		throw(MAL, "pcre.likeselect",
327
			  OPERATION_FAILED ": compilation of pattern \"%s\" failed\n", pat);
347
			  OPERATION_FAILED ": compilation of pattern \"%s\" failed\n", pat);
328
	pe = pcre_study(re, 0, &error);
348
	options = 0;
349
#ifdef HAVE_LIBPCRE_JIT
350
	options |= PCRE_STUDY_JIT_COMPILE;
351
#endif
352
	pe = pcre_study(re, options, &error);
329
	if (error != NULL) {
353
	if (error != NULL) {
330
		pcre_free(re);
354
		pcre_free(re);
331
		throw(MAL, "pcre.likeselect",
355
		throw(MAL, "pcre.likeselect",
332
			  OPERATION_FAILED ": studying pattern \"%s\" failed\n", pat);
356
			  OPERATION_FAILED ": studying pattern \"%s\" failed (%d)\n", pat, options);
333
	}
357
	}
358
#ifdef HAVE_LIBPCRE_JIT
359
	if ((jitstack = pcre_jit_stack_alloc(32768, 512*1024)) == NULL) {
360
		pcre_free_study(pe);
361
		pcre_free(re);
362
		throw(MAL, "pcre.likeselect",
363
			  OPERATION_FAILED ": JIT stack allocation \"%s\" failed\n", pat);
364
	}
365
#endif
334
#else
366
#else
335
	if ((errcode = regcomp(&re, pat, options)) != 0) {
367
	if ((errcode = regcomp(&re, pat, options)) != 0) {
336
		throw(MAL, "pcre.likeselect",
368
		throw(MAL, "pcre.likeselect",
Lines 340-345 Link Here
340
	bn = COLnew(0, TYPE_oid, s ? BATcount(s) : BATcount(b), TRANSIENT);
372
	bn = COLnew(0, TYPE_oid, s ? BATcount(s) : BATcount(b), TRANSIENT);
341
	if (bn == NULL) {
373
	if (bn == NULL) {
342
#ifdef HAVE_LIBPCRE
374
#ifdef HAVE_LIBPCRE
375
		FREE_PCRE_JIT();
343
		pcre_free_study(pe);
376
		pcre_free_study(pe);
344
		pcre_free(re);
377
		pcre_free(re);
345
#else
378
#else
Lines 363-369 Link Here
363
		p = SORTfndfirst(s, &b->hseqbase);
396
		p = SORTfndfirst(s, &b->hseqbase);
364
		candlist = (const oid *) Tloc(s, p);
397
		candlist = (const oid *) Tloc(s, p);
365
#ifdef HAVE_LIBPCRE
398
#ifdef HAVE_LIBPCRE
366
#define BODY     (pcre_exec(re, pe, v, (int) strlen(v), 0, 0, ovector, 10) >= 0)
399
#ifdef HAVE_LIBPCRE_JIT
400
#define BODY     (pcre_jit_exec(re, pe, v, (int) strlen(v), 0, PCRE_NO_UTF8_CHECK, ovector, 10, jitstack) >= 0)
401
#else
402
#define BODY     (pcre_exec(re, pe, v, (int) strlen(v), 0, PCRE_NO_UTF8_CHECK, ovector, 10) >= 0)
403
#endif
367
#else
404
#else
368
#define BODY     (regexec(&re, v, (size_t) 0, NULL, 0) != REG_NOMATCH)
405
#define BODY     (regexec(&re, v, (size_t) 0, NULL, 0) != REG_NOMATCH)
369
#endif
406
#endif
Lines 390-395 Link Here
390
			scanloop(v && *v != '\200' && BODY);
427
			scanloop(v && *v != '\200' && BODY);
391
	}
428
	}
392
#ifdef HAVE_LIBPCRE
429
#ifdef HAVE_LIBPCRE
430
	FREE_PCRE_JIT();
393
	pcre_free_study(pe);
431
	pcre_free_study(pe);
394
	pcre_free(re);
432
	pcre_free(re);
395
#else
433
#else
Lines 408-413 Link Here
408
  bunins_failed:
446
  bunins_failed:
409
	BBPreclaim(bn);
447
	BBPreclaim(bn);
410
#ifdef HAVE_LIBPCRE
448
#ifdef HAVE_LIBPCRE
449
	FREE_PCRE_JIT();
411
	pcre_free_study(pe);
450
	pcre_free_study(pe);
412
	pcre_free(re);
451
	pcre_free(re);
413
#else
452
#else
Lines 437-443 Link Here
437
	off = b->hseqbase;
476
	off = b->hseqbase;
438
477
439
	if (!use_strcmp) {
478
	if (!use_strcmp) {
440
		nr = re_simple(pat);
479
		nr = re_simple(pat, 0);
441
		re = re_create(pat, nr);
480
		re = re_create(pat, nr);
442
		if (!re)
481
		if (!re)
443
			throw(MAL, "pcre.likeselect", MAL_MALLOC_FAIL);
482
			throw(MAL, "pcre.likeselect", MAL_MALLOC_FAIL);
Lines 936-949 Link Here
936
	return MAL_SUCCEED;
975
	return MAL_SUCCEED;
937
}
976
}
938
977
939
#ifdef HAVE_LIBPCRE
978
/* special characters in PCRE and POSIX extended regular expressions that need to
940
/* special characters in PCRE that need to be escaped */
979
 * be escaped */
941
static const char *pcre_specials = ".+?*()[]{}|^$\\";
980
static const char *pcre_specials = ".+?*()[]{}|^$\\";
942
#else
943
/* special characters in POSIX basic regular expressions that need to
944
 * be escaped */
945
static const char *pcre_specials = ".*[]^$\\";
946
#endif
947
981
948
/* change SQL LIKE pattern into PCRE pattern */
982
/* change SQL LIKE pattern into PCRE pattern */
949
static str
983
static str
Lines 1024-1029 Link Here
1024
	return MAL_SUCCEED;
1058
	return MAL_SUCCEED;
1025
}
1059
}
1026
1060
1061
/* change SQL SIMILAR TO pattern into PCRE pattern */
1062
static str
1063
similar2pcre(str *r, const char *pat, const char *esc_str)
1064
{
1065
	int escaped = 0;
1066
	int hasWildcard = 0;
1067
	char *ppat;
1068
	int esc = esc_str[0]; /* should change to utf8_convert() */
1069
	int specials;
1070
	int c;
1071
	char *openparen = NULL;
1072
1073
	if (pat == NULL )
1074
		throw(MAL, "pcre.similar2pcre", OPERATION_FAILED);
1075
	ppat = GDKmalloc(strlen(pat)*2+5 /* 3 = "^('the translated regexp')$0" */);
1076
	if (ppat == NULL)
1077
		throw(MAL, "pcre.similar2pcre", MAL_MALLOC_FAIL);
1078
1079
	*r = ppat;
1080
	/* The escape character can be a char which is special in a PCRE
1081
	 * expression.  If the user used the "+" char as escape and has "++"
1082
	 * in its pattern, then replacing this with "+" is not correct and
1083
	 * should be "\+" instead. */
1084
	specials = (*esc_str && strchr(pcre_specials, esc) != NULL);
1085
1086
	*ppat++ = '^';
1087
	*ppat++ = '(';
1088
	while ((c = *pat++) != 0) {
1089
		if (c == esc) {
1090
			if (escaped) {
1091
				if (specials) { /* change ++ into \+ */
1092
					*ppat++ = esc;
1093
				} else { /* do not escape simple escape symbols */
1094
					ppat[-1] = esc; /* overwrite backslash */
1095
				}
1096
				escaped = 0;
1097
			} else {
1098
				*ppat++ = '\\';
1099
				escaped = 1;
1100
			}
1101
			hasWildcard = 1;
1102
		} else if (c == '%' && !escaped) {
1103
			*ppat++ = '.';
1104
			*ppat++ = '*';
1105
			hasWildcard = 1;
1106
		} else if (c == '_' && !escaped) {
1107
			*ppat++ = '.';
1108
			hasWildcard = 1;
1109
		} else if (c == '.' || c == '^' || c == '$' || c == '\\') {
1110
			/* characters which are special in PCRE but normal in SIMILAR TO
1111
			(including backslash when it's not the escape character) */
1112
			if (!escaped)
1113
				*ppat++ = '\\';
1114
			*ppat++ = c;
1115
			escaped = 0;
1116
		} else if (strchr("|*+?{}[]()", c)) {
1117
			/* characters special in both SQL and PCRE */
1118
			if (c == '?' && openparen == ppat - 1) {
1119
				/* magic escape sequence in PCRE, invalid in SIMILAR TO */
1120
				GDKfree(*r);
1121
				*r = NULL;
1122
				throw(MAL, "pcre.similar2pcre", OPERATION_FAILED);
1123
			} else {
1124
				if (c == '(' && !escaped)
1125
					openparen = ppat;
1126
				*ppat++ = c;
1127
			}
1128
			hasWildcard = 1;
1129
			escaped = 0;
1130
		} else {
1131
			if (escaped) {
1132
				ppat[-1] = c; /* overwrite backslash of invalid escape */
1133
			} else {
1134
				*ppat++ = c;
1135
			}
1136
			escaped = 0;
1137
		}
1138
	}
1139
	/* no wildcard, or escape character at end of string */
1140
	if (!hasWildcard || escaped) {
1141
		GDKfree(*r);
1142
		*r = NULL;
1143
		if (escaped)
1144
			throw(MAL, "pcre.similar2pcre", OPERATION_FAILED);
1145
		*r = GDKstrdup(str_nil);
1146
		if (*r == NULL)
1147
			throw(MAL, "pcre.similar2pcre", MAL_MALLOC_FAIL);
1148
	} else {
1149
		*ppat++ = ')';
1150
		*ppat++ = '$';
1151
		*ppat = 0;
1152
	}
1153
	return MAL_SUCCEED;
1154
}
1155
1027
/* change SQL PATINDEX pattern into PCRE pattern */
1156
/* change SQL PATINDEX pattern into PCRE pattern */
1028
static str
1157
static str
1029
pat2pcre(str *r, const char *pat)
1158
pat2pcre(str *r, const char *pat)
Lines 1160-1169 Link Here
1160
}
1289
}
1161
1290
1162
static str
1291
static str
1163
PCRElike4(bit *ret, const str *s, const str *pat, const str *esc, const bit *isens)
1292
PCRElike4(bit *ret, const str *s, const str *pat, const str *esc, const bit *isens, int similarto)
1164
{
1293
{
1165
	char *ppat = NULL;
1294
	char *ppat = NULL;
1166
	str r = sql2pcre(&ppat, *pat, *esc);
1295
	str r = similarto ? similar2pcre(&ppat, *pat, *esc) : sql2pcre(&ppat, *pat, *esc);
1167
1296
1168
	if (!r) {
1297
	if (!r) {
1169
		assert(ppat);
1298
		assert(ppat);
Lines 1194-1200 Link Here
1194
{
1323
{
1195
	bit no = FALSE;
1324
	bit no = FALSE;
1196
1325
1197
	return PCRElike4(ret, s, pat, esc, &no);
1326
	return PCRElike4(ret, s, pat, esc, &no, FALSE);
1198
}
1327
}
1199
1328
1200
str
1329
str
Lines 1232-1238 Link Here
1232
{
1361
{
1233
	bit yes = TRUE;
1362
	bit yes = TRUE;
1234
1363
1235
	return PCRElike4(ret, s, pat, esc, &yes);
1364
	return PCRElike4(ret, s, pat, esc, &yes, FALSE);
1236
}
1365
}
1237
1366
1238
str
1367
str
Lines 1265-1275 Link Here
1265
	return MAL_SUCCEED;
1394
	return MAL_SUCCEED;
1266
}
1395
}
1267
1396
1397
str
1398
PCREsimilarto3(bit *ret, const str *s, const str *pat, const str *esc)
1399
{
1400
	bit no = FALSE;
1401
1402
	return PCRElike4(ret, s, pat, esc, &no, TRUE);
1403
}
1404
1405
str
1406
PCREsimilarto2(bit *ret, const str *s, const str *pat)
1407
{
1408
	char *esc = "";
1409
1410
	return PCREsimilarto3(ret, s, pat, &esc);
1411
}
1412
1413
str
1414
PCREnotsimilarto3(bit *ret, const str *s, const str *pat, const str *esc)
1415
{
1416
	str tmp;
1417
	bit r;
1418
1419
	rethrow("str.not_like", tmp, PCREsimilarto3(&r, s, pat, esc));
1420
	*ret = !r;
1421
	return MAL_SUCCEED;
1422
}
1423
1424
str
1425
PCREnotsimilarto2(bit *ret, const str *s, const str *pat)
1426
{
1427
	str tmp;
1428
	bit r;
1429
1430
	rethrow("str.not_like", tmp, PCREsimilarto2(&r, s, pat));
1431
	*ret = !r;
1432
	return MAL_SUCCEED;
1433
}
1434
1268
static str
1435
static str
1269
BATPCRElike3(bat *ret, const bat *bid, const str *pat, const str *esc, const bit *isens, const bit *not)
1436
BATPCRElike3(bat *ret, const bat *bid, const str *pat, const str *esc, const bit *isens, const bit *not, int similarto)
1270
{
1437
{
1271
	char *ppat = NULL;
1438
	char *ppat = NULL;
1272
	str res = sql2pcre(&ppat, *pat, *esc);
1439
	str res = similarto ? similar2pcre(&ppat, *pat, *esc) : sql2pcre(&ppat, *pat, *esc);
1273
1440
1274
	if (res == MAL_SUCCEED) {
1441
	if (res == MAL_SUCCEED) {
1275
		BAT *strs = BATdescriptor(*bid);
1442
		BAT *strs = BATdescriptor(*bid);
Lines 1313-1319 Link Here
1313
			pcre *re;
1480
			pcre *re;
1314
#else
1481
#else
1315
			pcre re;
1482
			pcre re;
1316
			int options = REG_NEWLINE | REG_NOSUB;
1483
			int options = REG_NEWLINE | REG_NOSUB | REG_EXTENDED;
1317
			int errcode;
1484
			int errcode;
1318
#endif
1485
#endif
1319
1486
Lines 1397-1403 Link Here
1397
{
1564
{
1398
	bit no = FALSE;
1565
	bit no = FALSE;
1399
1566
1400
	return BATPCRElike3(ret, bid, pat, esc, &no, &no);
1567
	return BATPCRElike3(ret, bid, pat, esc, &no, &no, FALSE);
1401
}
1568
}
1402
1569
1403
str
1570
str
Lines 1414-1420 Link Here
1414
	bit no = FALSE;
1581
	bit no = FALSE;
1415
	bit yes = TRUE;
1582
	bit yes = TRUE;
1416
1583
1417
	return BATPCRElike3(ret, bid, pat, esc, &no, &yes);
1584
	return BATPCRElike3(ret, bid, pat, esc, &no, &yes, FALSE);
1418
}
1585
}
1419
1586
1420
str
1587
str
Lines 1431-1437 Link Here
1431
	bit yes = TRUE;
1598
	bit yes = TRUE;
1432
	bit no = FALSE;
1599
	bit no = FALSE;
1433
1600
1434
	return BATPCRElike3(ret, bid, pat, esc, &yes, &no);
1601
	return BATPCRElike3(ret, bid, pat, esc, &yes, &no, FALSE);
1435
}
1602
}
1436
1603
1437
str
1604
str
Lines 1447-1453 Link Here
1447
{
1614
{
1448
	bit yes = TRUE;
1615
	bit yes = TRUE;
1449
1616
1450
	return BATPCRElike3(ret, bid, pat, esc, &yes, &yes);
1617
	return BATPCRElike3(ret, bid, pat, esc, &yes, &yes, FALSE);
1451
}
1618
}
1452
1619
1453
str
1620
str
Lines 1459-1465 Link Here
1459
}
1626
}
1460
1627
1461
str
1628
str
1462
PCRElikeselect2(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *caseignore, const bit *anti)
1629
BATPCREsimilarto(bat *ret, const bat *bid, const str *pat, const str *esc)
1630
{
1631
	bit no = FALSE;
1632
1633
	return BATPCRElike3(ret, bid, pat, esc, &no, &no, TRUE);
1634
}
1635
1636
str
1637
BATPCREsimilarto2(bat *ret, const bat *bid, const str *pat)
1638
{
1639
	char *esc = "\\";
1640
1641
	return BATPCREsimilarto(ret, bid, pat, &esc);
1642
}
1643
1644
str
1645
BATPCREnotsimilarto(bat *ret, const bat *bid, const str *pat, const str *esc)
1646
{
1647
	bit no = FALSE;
1648
	bit yes = TRUE;
1649
1650
	return BATPCRElike3(ret, bid, pat, esc, &no, &yes, TRUE);
1651
}
1652
1653
str
1654
BATPCREnotsimilarto2(bat *ret, const bat *bid, const str *pat)
1655
{
1656
	char *esc = "\\";
1657
1658
	return BATPCREnotsimilarto(ret, bid, pat, &esc);
1659
}
1660
1661
static str
1662
PCREregexselect(bat *ret, const bat *bid, const bat *sid, const str pat, str esc, int caseignore, int anti, int similarto)
1463
{
1663
{
1464
	BAT *b, *s = NULL, *bn = NULL;
1664
	BAT *b, *s = NULL, *bn = NULL;
1465
	str res;
1665
	str res;
Lines 1476-1489 Link Here
1476
	}
1676
	}
1477
1677
1478
	/* no escape, try if a simple list of keywords works */
1678
	/* no escape, try if a simple list of keywords works */
1479
	if (is_strcmpable(*pat, *esc)) {
1679
	if (is_strcmpable(pat, esc, similarto)) {
1480
		use_re = 1;
1680
		use_re = 1;
1481
		use_strcmp = 1;
1681
		use_strcmp = 1;
1482
	} else if ((strcmp(*esc, str_nil) == 0 || strlen(*esc) == 0) &&
1682
	} else if ((strcmp(esc, str_nil) == 0 || strlen(esc) == 0) &&
1483
			   re_simple(*pat) > 0) {
1683
			   re_simple(pat, similarto) > 0) {
1484
		use_re = 1;
1684
		use_re = 1;
1485
	} else {
1685
	} else {
1486
		res = sql2pcre(&ppat, *pat, strcmp(*esc, str_nil) != 0 ? *esc : "\\");
1686
		if (strcmp(esc, str_nil) == 0)
1687
			esc = "\\";
1688
		res = similarto ? similar2pcre(&ppat, pat, esc) : sql2pcre(&ppat, pat, esc);
1487
		if (res != MAL_SUCCEED) {
1689
		if (res != MAL_SUCCEED) {
1488
			BBPunfix(b->batCacheid);
1690
			BBPunfix(b->batCacheid);
1489
			if (s)
1691
			if (s)
Lines 1493-1500 Link Here
1493
		if (strcmp(ppat, str_nil) == 0) {
1695
		if (strcmp(ppat, str_nil) == 0) {
1494
			GDKfree(ppat);
1696
			GDKfree(ppat);
1495
			ppat = NULL;
1697
			ppat = NULL;
1496
			if (*caseignore) {
1698
			if (caseignore) {
1497
				ppat = GDKmalloc(strlen(*pat) + 3);
1699
				ppat = GDKmalloc(strlen(pat) + 3);
1498
				if (ppat == NULL) {
1700
				if (ppat == NULL) {
1499
					BBPunfix(b->batCacheid);
1701
					BBPunfix(b->batCacheid);
1500
					if (s)
1702
					if (s)
Lines 1502-1524 Link Here
1502
					throw(MAL, "algebra.likeselect", MAL_MALLOC_FAIL);
1704
					throw(MAL, "algebra.likeselect", MAL_MALLOC_FAIL);
1503
				}
1705
				}
1504
				ppat[0] = '^';
1706
				ppat[0] = '^';
1505
				strcpy(ppat + 1, *pat);
1707
				strcpy(ppat + 1, pat);
1506
				strcat(ppat, "$");
1708
				strcat(ppat, "$");
1507
			}
1709
			}
1508
		}
1710
		}
1509
	}
1711
	}
1510
1712
1511
	if (use_re) {
1713
	if (use_re) {
1512
		res = re_likeselect(&bn, b, s, *pat, *caseignore, *anti, use_strcmp);
1714
		res = re_likeselect(&bn, b, s, pat, caseignore, anti, use_strcmp);
1513
	} else if (ppat == NULL) {
1715
	} else if (ppat == NULL) {
1514
		/* no pattern and no special characters: can use normal select */
1716
		/* no pattern and no special characters: can use normal select */
1515
		bn = BATselect(b, s, *pat, NULL, 1, 1, *anti);
1717
		bn = BATselect(b, s, pat, NULL, 1, 1, anti);
1516
		if (bn == NULL)
1718
		if (bn == NULL)
1517
			res = createException(MAL, "algebra.likeselect", GDK_EXCEPTION);
1719
			res = createException(MAL, "algebra.likeselect", GDK_EXCEPTION);
1518
		else
1720
		else
1519
			res = MAL_SUCCEED;
1721
			res = MAL_SUCCEED;
1520
	} else {
1722
	} else {
1521
		res = pcre_likeselect(&bn, b, s, ppat, *caseignore, *anti);
1723
		res = pcre_likeselect(&bn, b, s, ppat, caseignore, anti);
1522
	}
1724
	}
1523
	BBPunfix(b->batCacheid);
1725
	BBPunfix(b->batCacheid);
1524
	if (s)
1726
	if (s)
Lines 1533-1565 Link Here
1533
}
1735
}
1534
1736
1535
str
1737
str
1738
PCRElikeselect2(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *caseignore, const bit *anti)
1739
{
1740
	return PCREregexselect(ret, bid, sid, *pat, *esc, *caseignore, *anti, FALSE);
1741
}
1742
1743
str
1536
PCRElikeselect1(bat *ret, const bat *bid, const bat *cid, const str *pat, const str *esc, const bit *anti)
1744
PCRElikeselect1(bat *ret, const bat *bid, const bat *cid, const str *pat, const str *esc, const bit *anti)
1537
{
1745
{
1538
	const bit f = TRUE;
1746
	return PCREregexselect(ret, bid, cid, *pat, *esc, TRUE, *anti, FALSE);
1539
	return PCRElikeselect2(ret, bid, cid, pat, esc, &f, anti);
1540
}
1747
}
1541
1748
1542
str
1749
str
1543
PCRElikeselect3(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *anti)
1750
PCRElikeselect3(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *anti)
1544
{
1751
{
1545
	const bit f = FALSE;
1752
	return PCREregexselect(ret, bid, sid, *pat, *esc, FALSE, *anti, FALSE);
1546
	return PCRElikeselect2(ret, bid, sid, pat, esc, &f, anti);
1547
}
1753
}
1548
1754
1549
str
1755
str
1550
PCRElikeselect4(bat *ret, const bat *bid, const bat *cid, const str *pat, const bit *anti)
1756
PCRElikeselect4(bat *ret, const bat *bid, const bat *cid, const str *pat, const bit *anti)
1551
{
1757
{
1552
	const bit f = TRUE;
1758
	return PCREregexselect(ret, bid, cid, *pat, "", TRUE, *anti, FALSE);
1553
	const str esc ="";
1554
	return PCRElikeselect2(ret, bid, cid, pat, &esc, &f, anti);
1555
}
1759
}
1556
1760
1557
str
1761
str
1558
PCRElikeselect5(bat *ret, const bat *bid, const bat *sid, const str *pat, const bit *anti)
1762
PCRElikeselect5(bat *ret, const bat *bid, const bat *sid, const str *pat, const bit *anti)
1559
{
1763
{
1560
	const bit f = FALSE;
1764
	return PCREregexselect(ret, bid, sid, *pat, "", FALSE, *anti, FALSE);
1561
	const str esc ="";
1765
}
1562
	return PCRElikeselect2(ret, bid, sid, pat, &esc, &f, anti);
1766
1767
str
1768
PCREsimilartoselect2(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *caseignore, const bit *anti)
1769
{
1770
	return PCREregexselect(ret, bid, sid, *pat, *esc, *caseignore, *anti, TRUE);
1771
}
1772
1773
str
1774
PCREsimilartoselect3(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *anti)
1775
{
1776
	return PCREregexselect(ret, bid, sid, *pat, *esc, FALSE, *anti, TRUE);
1777
}
1778
1779
str
1780
PCREsimilartoselect5(bat *ret, const bat *bid, const bat *sid, const str *pat, const bit *anti)
1781
{
1782
	return PCREregexselect(ret, bid, sid, *pat, "", FALSE, *anti, TRUE);
1563
}
1783
}
1564
1784
1565
#include "gdk_cand.h"
1785
#include "gdk_cand.h"
Lines 1597-1603 Link Here
1597
#else
1817
#else
1598
	int pcrere = 0;
1818
	int pcrere = 0;
1599
	pcre regex;
1819
	pcre regex;
1600
	int options =  REG_NEWLINE | REG_NOSUB;
1820
	int options =  REG_NEWLINE | REG_NOSUB | REG_EXTENDED;
1601
	int errcode = -1;
1821
	int errcode = -1;
1602
#endif
1822
#endif
1603
1823
Lines 1665-1671 Link Here
1665
		}
1885
		}
1666
		if (strcmp(vr, str_nil) == 0)
1886
		if (strcmp(vr, str_nil) == 0)
1667
			continue;
1887
			continue;
1668
		if (*esc == 0 && (nr = re_simple(vr)) > 0) {
1888
		if (*esc == 0 && (nr = re_simple(vr, 0)) > 0) {
1669
			re = re_create(vr, nr);
1889
			re = re_create(vr, nr);
1670
			if (re == NULL) {
1890
			if (re == NULL) {
1671
				msg = createException(MAL, "pcre.join", MAL_MALLOC_FAIL);
1891
				msg = createException(MAL, "pcre.join", MAL_MALLOC_FAIL);
(-)a/monetdb5/modules/mal/pcre.mal (+17 lines)
Lines 60-65 Link Here
60
command algebra.ilike(s:str, pat:str):bit address PCREilike2;
60
command algebra.ilike(s:str, pat:str):bit address PCREilike2;
61
command algebra.not_ilike(s:str, pat:str, esc:str):bit address PCREnotilike3;
61
command algebra.not_ilike(s:str, pat:str, esc:str):bit address PCREnotilike3;
62
command algebra.not_ilike(s:str, pat:str):bit address PCREnotilike2;
62
command algebra.not_ilike(s:str, pat:str):bit address PCREnotilike2;
63
command algebra.similarto(s:str, pat:str, esc:str):bit address PCREsimilarto3;
64
command algebra.similarto(s:str, pat:str):bit address PCREsimilarto2;
65
command algebra.not_similarto(s:str, pat:str, esc:str):bit address PCREnotsimilarto3;
66
command algebra.not_similarto(s:str, pat:str):bit address PCREnotsimilarto2;
63
67
64
module batpcre;
68
module batpcre;
65
69
Lines 76-81 Link Here
76
command batalgebra.ilike(s:bat[:str], pat:str):bat[:bit] address BATPCREilike2;
80
command batalgebra.ilike(s:bat[:str], pat:str):bat[:bit] address BATPCREilike2;
77
command batalgebra.not_ilike(s:bat[:str], pat:str, esc:str):bat[:bit] address BATPCREnotilike;
81
command batalgebra.not_ilike(s:bat[:str], pat:str, esc:str):bat[:bit] address BATPCREnotilike;
78
command batalgebra.not_ilike(s:bat[:str], pat:str):bat[:bit] address BATPCREnotilike2;
82
command batalgebra.not_ilike(s:bat[:str], pat:str):bat[:bit] address BATPCREnotilike2;
83
command batalgebra.similarto(s:bat[:str], pat:str, esc:str):bat[:bit] address BATPCREsimilarto;
84
command batalgebra.similarto(s:bat[:str], pat:str):bat[:bit] address BATPCREsimilarto2;
85
command batalgebra.not_similarto(s:bat[:str], pat:str, esc:str):bat[:bit] address BATPCREnotsimilarto;
86
command batalgebra.not_similarto(s:bat[:str], pat:str):bat[:bit] address BATPCREnotsimilarto2;
79
87
80
command algebra.likeselect(b:bat[:str], s:bat[:oid], pat:str, esc:str, caseignore:bit, anti:bit) :bat[:oid]
88
command algebra.likeselect(b:bat[:str], s:bat[:oid], pat:str, esc:str, caseignore:bit, anti:bit) :bat[:oid]
81
address PCRElikeselect2
89
address PCRElikeselect2
Lines 99-104 Link Here
99
command algebra.ilikeselect(b:bat[:str], cand:bat[:oid], pat:str, anti:bit) :bat[:oid]
107
command algebra.ilikeselect(b:bat[:str], cand:bat[:oid], pat:str, anti:bit) :bat[:oid]
100
address PCRElikeselect4;
108
address PCRElikeselect4;
101
109
110
command algebra.similartoselect(b:bat[:str], s:bat[:oid], pat:str, esc:str, caseignore:bit, anti:bit) :bat[:oid]
111
address PCREsimilartoselect2;
112
113
command algebra.similartoselect(b:bat[:str], cand:bat[:oid], pat:str, esc:str, anti:bit) :bat[:oid]
114
address PCREsimilartoselect3;
115
116
command algebra.similartoselect(b:bat[:str], cand:bat[:oid], pat:str, anti:bit) :bat[:oid]
117
address PCREsimilartoselect5;
118
102
command algebra.likejoin(l:bat[:str],r:bat[:str],esc:str,sl:bat[:oid],sr:bat[:oid],nil_matches:bit,estimate:lng) (:bat[:oid],:bat[:oid])
119
command algebra.likejoin(l:bat[:str],r:bat[:str],esc:str,sl:bat[:oid],sr:bat[:oid],nil_matches:bit,estimate:lng) (:bat[:oid],:bat[:oid])
103
address LIKEjoin
120
address LIKEjoin
104
comment "Join the string bat L with the pattern bat R
121
comment "Join the string bat L with the pattern bat R
(-)a/monetdb5/optimizer/opt_prelude.c (+6 lines)
Lines 181-186 Link Here
181
str nextRef;
181
str nextRef;
182
str not_ilikeRef;
182
str not_ilikeRef;
183
str not_likeRef;
183
str not_likeRef;
184
str not_similartoRef;
184
str notRef;
185
str notRef;
185
str not_uniqueRef;
186
str not_uniqueRef;
186
str oidRef;
187
str oidRef;
Lines 242-247 Link Here
242
str setAccessRef;
243
str setAccessRef;
243
str setVariableRef;
244
str setVariableRef;
244
str setWriteModeRef;
245
str setWriteModeRef;
246
str similartoRef;
247
str similartoselectRef;
245
str singleRef;
248
str singleRef;
246
str sinkRef;
249
str sinkRef;
247
str sliceRef;
250
str sliceRef;
Lines 440-445 Link Here
440
	ilikeselectRef = putName("ilikeselect");
443
	ilikeselectRef = putName("ilikeselect");
441
	likethetaselectRef = putName("likethetaselect");
444
	likethetaselectRef = putName("likethetaselect");
442
	ilikethetaselectRef = putName("ilikethetaselect");
445
	ilikethetaselectRef = putName("ilikethetaselect");
446
	not_similartoRef = putName("not_similarto");
443
	not_likeRef = putName("not_like");
447
	not_likeRef = putName("not_like");
444
	not_ilikeRef = putName("not_ilike");
448
	not_ilikeRef = putName("not_ilike");
445
	lockRef = putName("lock");
449
	lockRef = putName("lock");
Lines 530-535 Link Here
530
	setAccessRef = putName("setAccess");
534
	setAccessRef = putName("setAccess");
531
	setVariableRef = putName("setVariable");
535
	setVariableRef = putName("setVariable");
532
	setWriteModeRef= putName("setWriteMode");
536
	setWriteModeRef= putName("setWriteMode");
537
	similartoRef = putName("similarto");
538
	similartoselectRef = putName("similartoselect");
533
	sinkRef = putName("sink");
539
	sinkRef = putName("sink");
534
	sliceRef = putName("slice");
540
	sliceRef = putName("slice");
535
	subsliceRef = putName("subslice");
541
	subsliceRef = putName("subslice");
(-)a/monetdb5/optimizer/opt_prelude.h (+3 lines)
Lines 181-186 Link Here
181
mal_export  str nextRef;
181
mal_export  str nextRef;
182
mal_export  str not_ilikeRef;
182
mal_export  str not_ilikeRef;
183
mal_export  str not_likeRef;
183
mal_export  str not_likeRef;
184
mal_export  str not_similartoRef;
184
mal_export  str notRef;
185
mal_export  str notRef;
185
mal_export  str not_uniqueRef;
186
mal_export  str not_uniqueRef;
186
mal_export  str oidRef;
187
mal_export  str oidRef;
Lines 242-247 Link Here
242
mal_export  str setAccessRef;
243
mal_export  str setAccessRef;
243
mal_export  str setVariableRef;
244
mal_export  str setVariableRef;
244
mal_export  str setWriteModeRef;
245
mal_export  str setWriteModeRef;
246
mal_export  str similartoRef;
247
mal_export  str similartoselectRef;
245
mal_export  str singleRef;
248
mal_export  str singleRef;
246
mal_export  str sinkRef;
249
mal_export  str sinkRef;
247
mal_export  str sliceRef;
250
mal_export  str sliceRef;
(-)a/monetdb5/optimizer/opt_pushselect.c (-3 / +3 lines)
Lines 175-181 Link Here
175
		if (isSlice(p))
175
		if (isSlice(p))
176
			nr_topn++;
176
			nr_topn++;
177
177
178
		if (isLikeOp(p))
178
		if (isLikeOrSimilarToOp(p))
179
			nr_likes++;
179
			nr_likes++;
180
180
181
		if (getModuleId(p) == sqlRef && getFunctionId(p) == deltaRef)
181
		if (getModuleId(p) == sqlRef && getFunctionId(p) == deltaRef)
Lines 312-319 Link Here
312
			int var = getArg(p, 1);
312
			int var = getArg(p, 1);
313
			InstrPtr q = mb->stmt[vars[var]]; /* BEWARE: the optimizer may not add or remove statements ! */
313
			InstrPtr q = mb->stmt[vars[var]]; /* BEWARE: the optimizer may not add or remove statements ! */
314
314
315
			if (isLikeOp(q)) { /* TODO check if getArg(p, 3) value == TRUE */
315
			if (isLikeOrSimilarToOp(q)) { /* TODO check if getArg(p, 3) value == TRUE */
316
				InstrPtr r = newInstruction(mb, algebraRef, likeselectRef);
316
				InstrPtr r = newInstruction(mb, algebraRef, isLikeOp(q) ? likeselectRef : similartoselectRef);
317
				int has_cand = (getArgType(mb, p, 2) == newBatType(TYPE_oid)); 
317
				int has_cand = (getArgType(mb, p, 2) == newBatType(TYPE_oid)); 
318
				int a, anti = (getFunctionId(q)[0] == 'n'), ignore_case = (getFunctionId(q)[anti?4:0] == 'i');
318
				int a, anti = (getFunctionId(q)[0] == 'n'), ignore_case = (getFunctionId(q)[anti?4:0] == 'i');
319
319
(-)a/monetdb5/optimizer/opt_support.c (+10 lines)
Lines 535-540 Link Here
535
		 getFunctionId(p) == not_ilikeRef));
535
		 getFunctionId(p) == not_ilikeRef));
536
}
536
}
537
537
538
int isSimilarToOp(InstrPtr p){
539
	return	(getModuleId(p) == batalgebraRef &&
540
		(getFunctionId(p) == similartoRef || 
541
		 getFunctionId(p) == not_similartoRef));
542
}
543
544
int isLikeOrSimilarToOp(InstrPtr p){
545
	return isLikeOp(p) || isSimilarToOp(p);
546
}
547
538
int 
548
int 
539
isTopn(InstrPtr p)
549
isTopn(InstrPtr p)
540
{
550
{
(-)a/monetdb5/optimizer/opt_support.h (+2 lines)
Lines 43-48 Link Here
43
mal_export int isMatLeftJoinOp(InstrPtr q);
43
mal_export int isMatLeftJoinOp(InstrPtr q);
44
mal_export int isMapOp(InstrPtr q);
44
mal_export int isMapOp(InstrPtr q);
45
mal_export int isLikeOp(InstrPtr q);
45
mal_export int isLikeOp(InstrPtr q);
46
mal_export int isSimilarToOp(InstrPtr q);
47
mal_export int isLikeOrSimilarToOp(InstrPtr q);
46
mal_export int isTopn(InstrPtr q);
48
mal_export int isTopn(InstrPtr q);
47
mal_export int isSlice(InstrPtr q);
49
mal_export int isSlice(InstrPtr q);
48
mal_export int isSample(InstrPtr q);
50
mal_export int isSample(InstrPtr q);
(-)a/sql/backends/monet5/sql_upgrades.c (-1 / +5 lines)
Lines 1380-1386 Link Here
1380
1380
1381
	/* 09_like.sql */
1381
	/* 09_like.sql */
1382
	pos += snprintf(buf + pos, bufsize - pos,
1382
	pos += snprintf(buf + pos, bufsize - pos,
1383
			"update sys.functions set side_effect = false where name in ('like', 'ilike') and schema_id = (select id from sys.schemas where name = 'sys');\n");
1383
			"update sys.functions set side_effect = false where name in ('like', 'ilike') and schema_id = (select id from sys.schemas where name = 'sys');\n"
1384
			"create filter function similarto(val string, pat string, esc string) external name algebra.similarto;\n"
1385
			"create filter function similarto(val string, pat string) external name algebra.similarto;\n"
1386
			"grant execute on filter function similarto (string, string, string) to public;\n"
1387
			"grant execute on filter function similarto (string, string) to public;\n");
1384
1388
1385
	/* 25_debug.sql */
1389
	/* 25_debug.sql */
1386
	pos += snprintf(buf + pos, bufsize - pos,
1390
	pos += snprintf(buf + pos, bufsize - pos,
(-)a/sql/common/sql_types.c (+2 lines)
Lines 1828-1833 Link Here
1828
		sql_create_func3(sa, "not_like", "algebra", "not_like", *t, *t, *t, BIT, SCALE_NONE);
1828
		sql_create_func3(sa, "not_like", "algebra", "not_like", *t, *t, *t, BIT, SCALE_NONE);
1829
		sql_create_func(sa, "not_ilike", "algebra", "not_ilike", *t, *t, BIT, SCALE_NONE);
1829
		sql_create_func(sa, "not_ilike", "algebra", "not_ilike", *t, *t, BIT, SCALE_NONE);
1830
		sql_create_func3(sa, "not_ilike", "algebra", "not_ilike", *t, *t, *t, BIT, SCALE_NONE);
1830
		sql_create_func3(sa, "not_ilike", "algebra", "not_ilike", *t, *t, *t, BIT, SCALE_NONE);
1831
		sql_create_func(sa, "not_similarto", "algebra", "not_similarto", *t, *t, BIT, SCALE_NONE);
1832
		sql_create_func3(sa, "not_similarto", "algebra", "not_similarto", *t, *t, *t, BIT, SCALE_NONE);
1831
1833
1832
		sql_create_func(sa, "patindex", "pcre", "patindex", *t, *t, INT, SCALE_NONE);
1834
		sql_create_func(sa, "patindex", "pcre", "patindex", *t, *t, INT, SCALE_NONE);
1833
		sql_create_func(sa, "truncate", "str", "stringleft", *t, INT, *t, SCALE_NONE);
1835
		sql_create_func(sa, "truncate", "str", "stringleft", *t, INT, *t, SCALE_NONE);
(-)a/sql/scripts/09_like.sql (+4 lines)
Lines 8-15 Link Here
8
create filter function "ilike"(val string, pat string, esc string) external name algebra."ilike";
8
create filter function "ilike"(val string, pat string, esc string) external name algebra."ilike";
9
create filter function "like"(val string, pat string) external name algebra."like";
9
create filter function "like"(val string, pat string) external name algebra."like";
10
create filter function "ilike"(val string, pat string) external name algebra."ilike";
10
create filter function "ilike"(val string, pat string) external name algebra."ilike";
11
create filter function "similarto"(val string, pat string, esc string) external name algebra."similarto";
12
create filter function "similarto"(val string, pat string) external name algebra."similarto";
11
13
12
grant execute on filter function "like" (string, string, string) to public;
14
grant execute on filter function "like" (string, string, string) to public;
13
grant execute on filter function "ilike" (string, string, string) to public;
15
grant execute on filter function "ilike" (string, string, string) to public;
14
grant execute on filter function "like" (string, string) to public;
16
grant execute on filter function "like" (string, string) to public;
15
grant execute on filter function "ilike" (string, string) to public;
17
grant execute on filter function "ilike" (string, string) to public;
18
grant execute on filter function "similarto" (string, string, string) to public;
19
grant execute on filter function "similarto" (string, string) to public;
(-)a/sql/server/rel_select.c (-11 / +18 lines)
Lines 2264-2278 Link Here
2264
	}
2264
	}
2265
	case SQL_LIKE:
2265
	case SQL_LIKE:
2266
	case SQL_NOT_LIKE:
2266
	case SQL_NOT_LIKE:
2267
	case SQL_SIMILARTO:
2268
	case SQL_NOT_SIMILARTO:
2267
	{
2269
	{
2268
		symbol *lo = sc->data.lval->h->data.sym;
2270
		symbol *lo = sc->data.lval->h->data.sym;
2269
		symbol *ro = sc->data.lval->h->next->data.sym;
2271
		symbol *ro = sc->data.lval->h->next->data.sym;
2270
		int insensitive = sc->data.lval->h->next->next->data.i_val;
2272
		int insensitive = sc->data.lval->h->next->next->data.i_val;
2271
		int anti = (sc->token == SQL_NOT_LIKE) != (sc->data.lval->h->next->next->next->data.i_val != 0);
2273
		int anti = (sc->token == SQL_NOT_LIKE || sc->token == SQL_NOT_SIMILARTO) != (sc->data.lval->h->next->next->next->data.i_val != 0);
2274
		int regex = sc->token == SQL_SIMILARTO || sc->token == SQL_NOT_SIMILARTO;
2272
		sql_subtype *st = sql_bind_localtype("str");
2275
		sql_subtype *st = sql_bind_localtype("str");
2273
		sql_exp *le = rel_value_exp(sql, rel, lo, f, ek);
2276
		sql_exp *le = rel_value_exp(sql, rel, lo, f, ek);
2274
		sql_exp *re, *ee = NULL;
2277
		sql_exp *re, *ee = NULL;
2275
		char *like = insensitive ? (anti ? "not_ilike" : "ilike") : (anti ? "not_like" : "like");
2278
		char *like = regex ? (anti ? "not_similarto" : "similarto")
2279
		                   : (insensitive ? (anti ? "not_ilike" : "ilike") : (anti ? "not_like" : "like"));
2276
		sql_schema *sys = mvc_bind_schema(sql, "sys");
2280
		sql_schema *sys = mvc_bind_schema(sql, "sys");
2277
2281
2278
		if (!le)
2282
		if (!le)
Lines 2280-2286 Link Here
2280
2284
2281
		if (!exp_subtype(le)) 
2285
		if (!exp_subtype(le)) 
2282
			return sql_error(sql, 02, "SELECT: parameter not allowed on "
2286
			return sql_error(sql, 02, "SELECT: parameter not allowed on "
2283
					"left hand side of LIKE operator");
2287
					"left hand side of LIKE/SIMILAR TO operator");
2284
2288
2285
		lo = ro->data.lval->h->data.sym;
2289
		lo = ro->data.lval->h->data.sym;
2286
		/* like uses a single string pattern */
2290
		/* like uses a single string pattern */
Lines 2290-2298 Link Here
2290
			return NULL;
2294
			return NULL;
2291
		if (!exp_subtype(re)) {
2295
		if (!exp_subtype(re)) {
2292
			if (rel_set_type_param(sql, st, re, 0) == -1) 
2296
			if (rel_set_type_param(sql, st, re, 0) == -1) 
2293
				return sql_error(sql, 02, "LIKE: wrong type, should be string");
2297
				return sql_error(sql, 02, "LIKE/SIMILAR TO: wrong type, should be string");
2294
		} else if ((re = rel_check_type(sql, st, re, type_equal)) == NULL) {
2298
		} else if ((re = rel_check_type(sql, st, re, type_equal)) == NULL) {
2295
			return sql_error(sql, 02, "LIKE: wrong type, should be string");
2299
			return sql_error(sql, 02, "LIKE/SIMILAR TO: wrong type, should be string");
2296
		}
2300
		}
2297
		/* Do we need to escape ? */
2301
		/* Do we need to escape ? */
2298
		if (dlist_length(ro->data.lval) == 2) {
2302
		if (dlist_length(ro->data.lval) == 2) {
Lines 2875-2885 Link Here
2875
	}
2879
	}
2876
	case SQL_LIKE:
2880
	case SQL_LIKE:
2877
	case SQL_NOT_LIKE:
2881
	case SQL_NOT_LIKE:
2882
	case SQL_SIMILARTO:
2883
	case SQL_NOT_SIMILARTO:
2878
	{
2884
	{
2879
		symbol *lo = sc->data.lval->h->data.sym;
2885
		symbol *lo = sc->data.lval->h->data.sym;
2880
		symbol *ro = sc->data.lval->h->next->data.sym;
2886
		symbol *ro = sc->data.lval->h->next->data.sym;
2881
		int insensitive = sc->data.lval->h->next->next->data.i_val;
2887
		int insensitive = sc->data.lval->h->next->next->data.i_val;
2882
		int anti = (sc->token == SQL_NOT_LIKE) != (sc->data.lval->h->next->next->next->data.i_val != 0);
2888
		int anti = (sc->token == SQL_NOT_LIKE || sc->token == SQL_NOT_SIMILARTO) != (sc->data.lval->h->next->next->next->data.i_val != 0);
2889
		int regex = sc->token == SQL_SIMILARTO || sc->token == SQL_NOT_SIMILARTO;
2883
		sql_subtype *st = sql_bind_localtype("str");
2890
		sql_subtype *st = sql_bind_localtype("str");
2884
		sql_exp *le = rel_value_exp(sql, &rel, lo, f, ek);
2891
		sql_exp *le = rel_value_exp(sql, &rel, lo, f, ek);
2885
		sql_exp *re, *ee = NULL;
2892
		sql_exp *re, *ee = NULL;
Lines 2889-2895 Link Here
2889
2896
2890
		if (!exp_subtype(le)) 
2897
		if (!exp_subtype(le)) 
2891
			return sql_error(sql, 02, "SELECT: parameter not allowed on "
2898
			return sql_error(sql, 02, "SELECT: parameter not allowed on "
2892
					"left hand side of LIKE operator");
2899
					"left hand side of LIKE/SIMILAR TO operator");
2893
2900
2894
		/* Do we need to escape ? */
2901
		/* Do we need to escape ? */
2895
		if (dlist_length(ro->data.lval) == 2) {
2902
		if (dlist_length(ro->data.lval) == 2) {
Lines 2904-2916 Link Here
2904
			return NULL;
2911
			return NULL;
2905
		if (!exp_subtype(re)) {
2912
		if (!exp_subtype(re)) {
2906
			if (rel_set_type_param(sql, st, re, 0) == -1) 
2913
			if (rel_set_type_param(sql, st, re, 0) == -1) 
2907
				return sql_error(sql, 02, "LIKE: wrong type, should be string");
2914
				return sql_error(sql, 02, "LIKE/SIMILAR TO: wrong type, should be string");
2908
		} else if ((re = rel_check_type(sql, st, re, type_equal)) == NULL) {
2915
		} else if ((re = rel_check_type(sql, st, re, type_equal)) == NULL) {
2909
			return sql_error(sql, 02, "LIKE: wrong type, should be string");
2916
			return sql_error(sql, 02, "LIKE/SIMILAR TO: wrong type, should be string");
2910
		}
2917
		}
2911
		if ((le = rel_check_type(sql, st, le, type_equal)) == NULL) 
2918
		if ((le = rel_check_type(sql, st, le, type_equal)) == NULL) 
2912
			return sql_error(sql, 02, "LIKE: wrong type, should be string");
2919
			return sql_error(sql, 02, "LIKE/SIMILAR TO: wrong type, should be string");
2913
		return rel_filter_exp_(sql, rel, le, re, ee, (insensitive ? "ilike" : "like"), anti);
2920
		return rel_filter_exp_(sql, rel, le, re, ee, regex ? "similarto" : (insensitive ? "ilike" : "like"), anti);
2914
	}
2921
	}
2915
	case SQL_BETWEEN:
2922
	case SQL_BETWEEN:
2916
	case SQL_NOT_BETWEEN:
2923
	case SQL_NOT_BETWEEN:
(-)a/sql/server/sql_parser.h (+2 lines)
Lines 123-128 Link Here
123
	SQL_NOT_BETWEEN,
123
	SQL_NOT_BETWEEN,
124
	SQL_LIKE,
124
	SQL_LIKE,
125
	SQL_NOT_LIKE,
125
	SQL_NOT_LIKE,
126
	SQL_SIMILARTO,
127
	SQL_NOT_SIMILARTO,
126
	SQL_IN,
128
	SQL_IN,
127
	SQL_NOT_IN,
129
	SQL_NOT_IN,
128
	SQL_GRANT,
130
	SQL_GRANT,
(-)a/sql/server/sql_parser.y (-2 / +25 lines)
Lines 166-171 Link Here
166
	transaction_statement
166
	transaction_statement
167
	_transaction_stmt
167
	_transaction_stmt
168
	like_predicate
168
	like_predicate
169
	similarto_predicate
169
	opt_where_clause
170
	opt_where_clause
170
	opt_having_clause
171
	opt_having_clause
171
	opt_group_by_clause
172
	opt_group_by_clause
Lines 568-574 Link Here
568
569
569
%left <operation> NOT
570
%left <operation> NOT
570
%left <operation> '='
571
%left <operation> '='
571
%left <operation> ALL ANY NOT_BETWEEN BETWEEN NOT_IN sqlIN NOT_LIKE LIKE NOT_ILIKE ILIKE OR SOME
572
%left <operation> ALL ANY NOT_BETWEEN BETWEEN NOT_IN sqlIN NOT_LIKE LIKE NOT_ILIKE ILIKE NOT_SIMILAR_TO SIMILAR_TO OR SOME
572
%left <operation> AND
573
%left <operation> AND
573
%left <sval> COMPARISON /* <> < > <= >= */
574
%left <sval> COMPARISON /* <> < > <= >= */
574
%left <operation> '+' '-' '&' '|' '^' LEFT_SHIFT RIGHT_SHIFT LEFT_SHIFT_ASSIGN RIGHT_SHIFT_ASSIGN CONCATSTRING SUBSTRING POSITION SPLIT_PART
575
%left <operation> '+' '-' '&' '|' '^' LEFT_SHIFT RIGHT_SHIFT LEFT_SHIFT_ASSIGN RIGHT_SHIFT_ASSIGN CONCATSTRING SUBSTRING POSITION SPLIT_PART
Lines 609-615 Link Here
609
%token COPY RECORDS DELIMITERS STDIN STDOUT FWF
610
%token COPY RECORDS DELIMITERS STDIN STDOUT FWF
610
%token INDEX REPLACE
611
%token INDEX REPLACE
611
612
612
%token AS TRIGGER OF BEFORE AFTER ROW STATEMENT sqlNEW OLD EACH REFERENCING
613
%token AS TRIGGER OF BEFORE AFTER ROW STATEMENT sqlNEW OLD EACH REFERENCING SIMILAR
613
%token OVER PARTITION CURRENT EXCLUDE FOLLOWING PRECEDING OTHERS TIES RANGE UNBOUNDED
614
%token OVER PARTITION CURRENT EXCLUDE FOLLOWING PRECEDING OTHERS TIES RANGE UNBOUNDED
614
615
615
%token X_BODY 
616
%token X_BODY 
Lines 3370-3375 Link Here
3370
    comparison_predicate
3371
    comparison_predicate
3371
 |  between_predicate
3372
 |  between_predicate
3372
 |  like_predicate
3373
 |  like_predicate
3374
 |  similarto_predicate
3373
 |  test_for_null
3375
 |  test_for_null
3374
 |  in_predicate
3376
 |  in_predicate
3375
 |  all_or_any_predicate
3377
 |  all_or_any_predicate
Lines 3394-3399 Link Here
3394
			$$->token = SQL_LIKE;
3396
			$$->token = SQL_LIKE;
3395
		  else if ($$->token == SQL_LIKE)
3397
		  else if ($$->token == SQL_LIKE)
3396
			$$->token = SQL_NOT_LIKE;
3398
			$$->token = SQL_NOT_LIKE;
3399
		  else if ($$->token == SQL_NOT_SIMILARTO)
3400
			$$->token = SQL_SIMILARTO;
3401
		  else if ($$->token == SQL_SIMILARTO)
3402
			$$->token = SQL_NOT_SIMILARTO;
3397
		  else
3403
		  else
3398
			$$ = _symbol_create_symbol(SQL_NOT, $2); }
3404
			$$ = _symbol_create_symbol(SQL_NOT, $2); }
3399
 |   predicate	{ $$ = $1; }
3405
 |   predicate	{ $$ = $1; }
Lines 3470-3475 Link Here
3470
		  $$ = _symbol_create_list( SQL_LIKE, l ); }
3476
		  $$ = _symbol_create_list( SQL_LIKE, l ); }
3471
 ;
3477
 ;
3472
3478
3479
similarto_predicate:
3480
    pred_exp NOT_SIMILAR_TO like_exp
3481
		{ dlist *l = L();
3482
		  append_symbol(l, $1);
3483
		  append_symbol(l, $3);
3484
		  append_int(l, FALSE);  /* case sensitive */
3485
		  append_int(l, TRUE);  /* anti */
3486
		  $$ = _symbol_create_list( SQL_SIMILARTO, l ); }
3487
 |  pred_exp SIMILAR_TO like_exp
3488
		{ dlist *l = L();
3489
		  append_symbol(l, $1);
3490
		  append_symbol(l, $3);
3491
		  append_int(l, FALSE);  /* case sensitive */
3492
		  append_int(l, FALSE);  /* anti */
3493
		  $$ = _symbol_create_list( SQL_SIMILARTO, l ); }
3494
 ;
3495
3473
like_exp:
3496
like_exp:
3474
    scalar_exp
3497
    scalar_exp
3475
	{ dlist *l = L();
3498
	{ dlist *l = L();
(-)a/sql/server/sql_scan.c (+16 lines)
Lines 209-214 Link Here
209
	failed += keywords_insert("LATERAL", LATERAL);
209
	failed += keywords_insert("LATERAL", LATERAL);
210
	failed += keywords_insert("LEFT", LEFT);
210
	failed += keywords_insert("LEFT", LEFT);
211
	failed += keywords_insert("LIKE", LIKE);
211
	failed += keywords_insert("LIKE", LIKE);
212
	failed += keywords_insert("SIMILAR", SIMILAR);
212
	failed += keywords_insert("LIMIT", LIMIT);
213
	failed += keywords_insert("LIMIT", LIMIT);
213
	failed += keywords_insert("SAMPLE", SAMPLE);
214
	failed += keywords_insert("SAMPLE", SAMPLE);
214
	failed += keywords_insert("LOCAL", LOCAL);
215
	failed += keywords_insert("LOCAL", LOCAL);
Lines 1273-1278 Link Here
1273
			token = NOT_LIKE;
1274
			token = NOT_LIKE;
1274
		} else if (next == ILIKE) {
1275
		} else if (next == ILIKE) {
1275
			token = NOT_ILIKE;
1276
			token = NOT_ILIKE;
1277
		} else if (next == SIMILAR) {
1278
			next = sqllex(yylval, parm);
1279
			if (next == TO) {
1280
				token = NOT_SIMILAR_TO;
1281
			} else {
1282
    			lc->yynext = next;
1283
			}
1284
		} else {
1285
			lc->yynext = next;
1286
		}
1287
	} else if (token == SIMILAR) {
1288
		int next = sqllex(yylval, parm);
1289
1290
		if (next == TO) {
1291
			token = SIMILAR_TO;
1276
		} else {
1292
		} else {
1277
			lc->yynext = next;
1293
			lc->yynext = next;
1278
		}
1294
		}

Return to bug 6533