You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
300 lines
9.0 KiB
300 lines
9.0 KiB
2 years ago
|
/*
|
||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||
|
*
|
||
|
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
|
||
|
*/
|
||
|
|
||
|
#ifndef _GDK_SEARCH_H_
|
||
|
#define _GDK_SEARCH_H_
|
||
|
|
||
|
struct Hash {
|
||
|
int type; /* type of index entity */
|
||
|
uint8_t width; /* width of hash entries */
|
||
|
BUN mask1; /* .mask1 < .nbucket <= .mask2 */
|
||
|
BUN mask2; /* ... both are power-of-two minus one */
|
||
|
BUN nbucket; /* number of valid hash buckets */
|
||
|
BUN nunique; /* number of unique values */
|
||
|
BUN nheads; /* number of chain heads */
|
||
|
void *Bckt; /* hash buckets, points into .heapbckt */
|
||
|
void *Link; /* collision list, points into .heaplink */
|
||
|
Heap heaplink; /* heap where the hash links are stored */
|
||
|
Heap heapbckt; /* heap where the hash buckets are stored */
|
||
|
};
|
||
|
|
||
|
static inline BUN
|
||
|
HASHbucket(const Hash *h, BUN v)
|
||
|
{
|
||
|
return (v &= h->mask2) < h->nbucket ? v : v & h->mask1;
|
||
|
}
|
||
|
|
||
|
gdk_export gdk_return BAThash(BAT *b);
|
||
|
gdk_export void HASHdestroy(BAT *b);
|
||
|
gdk_export BUN HASHprobe(const Hash *h, const void *v);
|
||
|
gdk_export BUN HASHlist(Hash *h, BUN i);
|
||
|
|
||
|
#define BUN2 2
|
||
|
#define BUN4 4
|
||
|
#if SIZEOF_BUN == 8
|
||
|
#define BUN8 8
|
||
|
#endif
|
||
|
#ifdef BUN2
|
||
|
typedef uint16_t BUN2type;
|
||
|
#endif
|
||
|
typedef uint32_t BUN4type;
|
||
|
#if SIZEOF_BUN > 4
|
||
|
typedef uint64_t BUN8type;
|
||
|
#endif
|
||
|
#ifdef BUN2
|
||
|
#define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
|
||
|
#endif
|
||
|
#define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
|
||
|
#ifdef BUN8
|
||
|
#define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
|
||
|
#endif
|
||
|
|
||
|
/* play around with h->Bckt[i] and h->Link[j] */
|
||
|
|
||
|
static inline void
|
||
|
HASHput(Hash *h, BUN i, BUN v)
|
||
|
{
|
||
|
/* if v == BUN_NONE, assigning the value to a BUN2type
|
||
|
* etc. automatically converts to BUN2_NONE etc. */
|
||
|
switch (h->width) {
|
||
|
#ifdef BUN2
|
||
|
case BUN2:
|
||
|
((BUN2type *) h->Bckt)[i] = (BUN2type) v;
|
||
|
break;
|
||
|
#endif
|
||
|
default: /* BUN4 */
|
||
|
((BUN4type *) h->Bckt)[i] = (BUN4type) v;
|
||
|
break;
|
||
|
#ifdef BUN8
|
||
|
case BUN8:
|
||
|
((BUN8type *) h->Bckt)[i] = (BUN8type) v;
|
||
|
break;
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
HASHputlink(Hash *h, BUN i, BUN v)
|
||
|
{
|
||
|
/* if v == BUN_NONE, assigning the value to a BUN2type
|
||
|
* etc. automatically converts to BUN2_NONE etc. */
|
||
|
switch (h->width) {
|
||
|
#ifdef BUN2
|
||
|
case BUN2:
|
||
|
assert(v == BUN_NONE || v == BUN2_NONE || v < i);
|
||
|
((BUN2type *) h->Link)[i] = (BUN2type) v;
|
||
|
break;
|
||
|
#endif
|
||
|
default: /* BUN4 */
|
||
|
assert(v == BUN_NONE || v == BUN4_NONE || v < i);
|
||
|
((BUN4type *) h->Link)[i] = (BUN4type) v;
|
||
|
break;
|
||
|
#ifdef BUN8
|
||
|
case BUN8:
|
||
|
assert(v == BUN_NONE || v == BUN8_NONE || v < i);
|
||
|
((BUN8type *) h->Link)[i] = (BUN8type) v;
|
||
|
break;
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline BUN __attribute__((__pure__))
|
||
|
HASHget(const Hash *h, BUN i)
|
||
|
{
|
||
|
switch (h->width) {
|
||
|
#ifdef BUN2
|
||
|
case BUN2:
|
||
|
i = (BUN) ((BUN2type *) h->Bckt)[i];
|
||
|
return i == BUN2_NONE ? BUN_NONE : i;
|
||
|
#endif
|
||
|
default: /* BUN4 */
|
||
|
i = (BUN) ((BUN4type *) h->Bckt)[i];
|
||
|
return i == BUN4_NONE ? BUN_NONE : i;
|
||
|
#ifdef BUN8
|
||
|
case BUN8:
|
||
|
i = (BUN) ((BUN8type *) h->Bckt)[i];
|
||
|
return i == BUN8_NONE ? BUN_NONE : i;
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static inline BUN __attribute__((__pure__))
|
||
|
HASHgetlink(const Hash *h, BUN i)
|
||
|
{
|
||
|
switch (h->width) {
|
||
|
#ifdef BUN2
|
||
|
case BUN2:
|
||
|
i = (BUN) ((BUN2type *) h->Link)[i];
|
||
|
return i == BUN2_NONE ? BUN_NONE : i;
|
||
|
#endif
|
||
|
default: /* BUN4 */
|
||
|
i = (BUN) ((BUN4type *) h->Link)[i];
|
||
|
return i == BUN4_NONE ? BUN_NONE : i;
|
||
|
#ifdef BUN8
|
||
|
case BUN8:
|
||
|
i = (BUN) ((BUN8type *) h->Link)[i];
|
||
|
return i == BUN8_NONE ? BUN_NONE : i;
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* mix_bte(0x80) == 0x80 */
|
||
|
#define mix_bte(X) ((unsigned int) (unsigned char) (X))
|
||
|
/* mix_sht(0x8000) == 0x8000 */
|
||
|
#define mix_sht(X) ((unsigned int) (unsigned short) (X))
|
||
|
/* mix_int(0x81060038) == 0x80000000 */
|
||
|
#define mix_int(X) (((unsigned int) (X) >> 7) ^ \
|
||
|
((unsigned int) (X) >> 13) ^ \
|
||
|
((unsigned int) (X) >> 21) ^ \
|
||
|
(unsigned int) (X))
|
||
|
/* mix_lng(0x810600394347424F) == 0x8000000000000000 */
|
||
|
#define mix_lng(X) (((ulng) (X) >> 7) ^ \
|
||
|
((ulng) (X) >> 13) ^ \
|
||
|
((ulng) (X) >> 21) ^ \
|
||
|
((ulng) (X) >> 31) ^ \
|
||
|
((ulng) (X) >> 38) ^ \
|
||
|
((ulng) (X) >> 46) ^ \
|
||
|
((ulng) (X) >> 56) ^ \
|
||
|
(ulng) (X))
|
||
|
#ifdef HAVE_HGE
|
||
|
/* mix_hge(0x810600394347424F90AC1429D6BFCC57) ==
|
||
|
* 0x80000000000000000000000000000000 */
|
||
|
#define mix_hge(X) (((uhge) (X) >> 7) ^ \
|
||
|
((uhge) (X) >> 13) ^ \
|
||
|
((uhge) (X) >> 21) ^ \
|
||
|
((uhge) (X) >> 31) ^ \
|
||
|
((uhge) (X) >> 38) ^ \
|
||
|
((uhge) (X) >> 46) ^ \
|
||
|
((uhge) (X) >> 56) ^ \
|
||
|
((uhge) (X) >> 65) ^ \
|
||
|
((uhge) (X) >> 70) ^ \
|
||
|
((uhge) (X) >> 78) ^ \
|
||
|
((uhge) (X) >> 85) ^ \
|
||
|
((uhge) (X) >> 90) ^ \
|
||
|
((uhge) (X) >> 98) ^ \
|
||
|
((uhge) (X) >> 107) ^ \
|
||
|
((uhge) (X) >> 116) ^ \
|
||
|
(uhge) (X))
|
||
|
#endif
|
||
|
#define hash_loc(H,V) hash_any(H,V)
|
||
|
#define hash_var(H,V) hash_any(H,V)
|
||
|
#define hash_any(H,V) HASHbucket(H, ATOMhash((H)->type, (V)))
|
||
|
#define hash_bte(H,V) (assert((H)->nbucket >= 256), (BUN) mix_bte(*(const unsigned char*) (V)))
|
||
|
#define hash_sht(H,V) (assert((H)->nbucket >= 65536), (BUN) mix_sht(*(const unsigned short*) (V)))
|
||
|
#define hash_int(H,V) HASHbucket(H, (BUN) mix_int(*(const unsigned int *) (V)))
|
||
|
/* XXX return size_t-sized value for 8-byte oid? */
|
||
|
#define hash_lng(H,V) HASHbucket(H, (BUN) mix_lng(*(const ulng *) (V)))
|
||
|
#ifdef HAVE_HGE
|
||
|
#define hash_hge(H,V) HASHbucket(H, (BUN) mix_hge(*(const uhge *) (V)))
|
||
|
#endif
|
||
|
#if SIZEOF_OID == SIZEOF_INT
|
||
|
#define hash_oid(H,V) hash_int(H,V)
|
||
|
#else
|
||
|
#define hash_oid(H,V) hash_lng(H,V)
|
||
|
#endif
|
||
|
|
||
|
#define hash_flt(H,V) hash_int(H,V)
|
||
|
#define hash_dbl(H,V) hash_lng(H,V)
|
||
|
|
||
|
static inline BUN __attribute__((__const__))
|
||
|
mix_uuid(const uuid *u)
|
||
|
{
|
||
|
ulng u1, u2;
|
||
|
|
||
|
u1 = (ulng) (uint8_t) u->u[0] << 56 |
|
||
|
(ulng) (uint8_t) u->u[1] << 48 |
|
||
|
(ulng) (uint8_t) u->u[2] << 40 |
|
||
|
(ulng) (uint8_t) u->u[3] << 32 |
|
||
|
(ulng) (uint8_t) u->u[4] << 24 |
|
||
|
(ulng) (uint8_t) u->u[5] << 16 |
|
||
|
(ulng) (uint8_t) u->u[6] << 8 |
|
||
|
(ulng) (uint8_t) u->u[7];
|
||
|
u2 = (ulng) (uint8_t) u->u[8] << 56 |
|
||
|
(ulng) (uint8_t) u->u[9] << 48 |
|
||
|
(ulng) (uint8_t) u->u[10] << 40 |
|
||
|
(ulng) (uint8_t) u->u[11] << 32 |
|
||
|
(ulng) (uint8_t) u->u[12] << 24 |
|
||
|
(ulng) (uint8_t) u->u[13] << 16 |
|
||
|
(ulng) (uint8_t) u->u[14] << 8 |
|
||
|
(ulng) (uint8_t) u->u[15];
|
||
|
/* we're not using mix_hge since this way we get the same result
|
||
|
* on systems with and without 128 bit integer support */
|
||
|
return (BUN) (mix_lng(u1) ^ mix_lng(u2));
|
||
|
}
|
||
|
#define hash_uuid(H,V) HASHbucket(H, mix_uuid((const uuid *) (V)))
|
||
|
|
||
|
/*
|
||
|
* @- hash-table supported loop over BUNs The first parameter `bi' is
|
||
|
* a BAT iterator, the second (`h') should point to the Hash
|
||
|
* structure, and `v' a pointer to an atomic value (corresponding to
|
||
|
* the head column of `b'). The 'hb' is an BUN index, pointing out the
|
||
|
* `hb'-th BUN.
|
||
|
*/
|
||
|
#define HASHloop(bi, h, hb, v) \
|
||
|
for (hb = HASHget(h, HASHprobe(h, v)); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(h, hb)) \
|
||
|
if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
|
||
|
#define HASHloop_str(bi, h, hb, v) \
|
||
|
for (hb = HASHget(h, HASHbucket(h, strHash(v))); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(h, hb)) \
|
||
|
if (strEQ(v, BUNtvar(bi, hb)))
|
||
|
|
||
|
#define HASHlooploc(bi, h, hb, v) \
|
||
|
for (hb = HASHget(h, HASHprobe(h, v)); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(h, hb)) \
|
||
|
if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
|
||
|
#define HASHloopvar(bi, h, hb, v) \
|
||
|
for (hb = HASHget(h, HASHprobe(h, v)); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(h, hb)) \
|
||
|
if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
|
||
|
|
||
|
#define HASHloop_TYPE(bi, h, hb, v, TYPE) \
|
||
|
for (hb = HASHget(h, hash_##TYPE(h, v)); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(h,hb)) \
|
||
|
if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
|
||
|
|
||
|
/* need to take special care comparing nil floating point values */
|
||
|
#define HASHloop_fTYPE(bi, h, hb, v, TYPE) \
|
||
|
for (hb = HASHget(h, hash_##TYPE(h, v)); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(h,hb)) \
|
||
|
if (is_##TYPE##_nil(* (const TYPE *) (v)) \
|
||
|
? is_##TYPE##_nil(* (const TYPE *) BUNtloc(bi, hb)) \
|
||
|
: * (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
|
||
|
|
||
|
#define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
|
||
|
#define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht)
|
||
|
#define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int)
|
||
|
#define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng)
|
||
|
#ifdef HAVE_HGE
|
||
|
#define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge)
|
||
|
#endif
|
||
|
#define HASHloop_flt(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, flt)
|
||
|
#define HASHloop_dbl(bi, h, hb, v) HASHloop_fTYPE(bi, h, hb, v, dbl)
|
||
|
#ifdef HAVE_HGE
|
||
|
#define HASHloop_uuid(bi, hsh, hb, v) \
|
||
|
for (hb = HASHget(hsh, hash_uuid(hsh, v)); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(hsh,hb)) \
|
||
|
if (((const uuid *) (v))->h == ((const uuid *) BUNtloc(bi, hb))->h)
|
||
|
#else
|
||
|
#define HASHloop_uuid(bi, h, hb, v) \
|
||
|
for (hb = HASHget(h, hash_uuid(h, v)); \
|
||
|
hb != BUN_NONE; \
|
||
|
hb = HASHgetlink(h,hb)) \
|
||
|
if (memcmp((const uuid *) (v), (const uuid *) BUNtloc(bi, hb), 16) == 0)
|
||
|
// if (((const uuid *) (v))->l[0] == ((const uuid *) BUNtloc(bi, hb))->l[0] && ((const uuid *) (v))->l[1] == ((const uuid *) BUNtloc(bi, hb))->l[1])
|
||
|
#endif
|
||
|
|
||
|
#endif /* _GDK_SEARCH_H_ */
|