You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AQuery/monetdb/msvc/gdk_atoms.h

464 lines
16 KiB

/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
*/
#ifndef _GDK_ATOMS_H_
#define _GDK_ATOMS_H_
/* atomFromStr returns the number of bytes of the input string that
* were processed. atomToStr returns the length of the string
* produced. Both functions return -1 on (any kind of) failure. If
* *dst is not NULL, *len specifies the available space. If there is
* not enough space, or if *dst is NULL, *dst will be freed (if not
* NULL) and a new buffer will be allocated and returned in *dst.
* *len will be set to reflect the actual size allocated. If
* allocation fails, *dst will be NULL on return and *len is
* undefined. In any case, if the function returns, *buf is either
* NULL or a valid pointer and then *len is the size of the area *buf
* points to.
*
* atomCmp returns a value less than zero/equal to zero/greater than
* zer if the first argument points to a values which is deemed
* smaller/equal to/larger than the value pointed to by the second
* argument.
*
* atomHash calculates a hash function for the value pointed to by the
* argument.
*/
#define IDLENGTH 64 /* maximum BAT id length */
typedef struct {
/* simple attributes */
char name[IDLENGTH];
uint8_t storage; /* stored as another type? */
bool linear; /* atom can be ordered linearly */
uint16_t size; /* fixed size of atom */
/* automatically generated fields */
const void *atomNull; /* global nil value */
/* generic (fixed + varsized atom) ADT functions */
ssize_t (*atomFromStr) (const char *src, size_t *len, void **dst, bool external);
ssize_t (*atomToStr) (char **dst, size_t *len, const void *src, bool external);
void *(*atomRead) (void *dst, size_t *dstlen, stream *s, size_t cnt);
gdk_return (*atomWrite) (const void *src, stream *s, size_t cnt);
int (*atomCmp) (const void *v1, const void *v2);
BUN (*atomHash) (const void *v);
/* optional functions */
gdk_return (*atomFix) (const void *atom);
gdk_return (*atomUnfix) (const void *atom);
/* varsized atom-only ADT functions */
var_t (*atomPut) (BAT *, var_t *off, const void *src);
void (*atomDel) (Heap *, var_t *atom);
size_t (*atomLen) (const void *atom);
gdk_return (*atomHeap) (Heap *, size_t);
} atomDesc;
#define MAXATOMS 128
gdk_export atomDesc BATatoms[MAXATOMS];
gdk_export int GDKatomcnt;
gdk_export int ATOMallocate(const char *nme);
gdk_export int ATOMindex(const char *nme);
gdk_export str ATOMname(int id);
gdk_export size_t ATOMlen(int id, const void *v);
gdk_export void *ATOMnil(int id)
__attribute__((__malloc__));
gdk_export int ATOMprint(int id, const void *val, stream *fd);
gdk_export char *ATOMformat(int id, const void *val);
gdk_export void *ATOMdup(int id, const void *val);
/*
* @- maximum atomic string lengths
*/
#define bitStrlen 8
#define bteStrlen 8
#define shtStrlen 12
#define intStrlen 24
#if SIZEOF_OID == SIZEOF_INT
#define oidStrlen 24
#else
#define oidStrlen 48
#endif
#if SIZEOF_PTR == SIZEOF_INT
#define ptrStrlen 24
#else
#define ptrStrlen 48
#endif
#define lngStrlen 48
#ifdef HAVE_HGE
#define hgeStrlen 96
#endif
#define fltStrlen 48
#define dblStrlen 96
/*
* The system comes with the traditional atomic types: int (4 bytes),
* bool(1 byte) and str (variable). In addition, we support the notion
* of an OID type, which ensures uniqueness of its members. This
* leads to the following type descriptor table.
*/
#ifdef HAVE_HGE
gdk_export ssize_t hgeFromStr(const char *src, size_t *len, hge **dst, bool external);
gdk_export ssize_t hgeToStr(str *dst, size_t *len, const hge *src, bool external);
#endif
gdk_export ssize_t lngFromStr(const char *src, size_t *len, lng **dst, bool external);
gdk_export ssize_t lngToStr(str *dst, size_t *len, const lng *src, bool external);
gdk_export ssize_t intFromStr(const char *src, size_t *len, int **dst, bool external);
gdk_export ssize_t intToStr(str *dst, size_t *len, const int *src, bool external);
gdk_export ssize_t batFromStr(const char *src, size_t *len, bat **dst, bool external);
gdk_export ssize_t batToStr(str *dst, size_t *len, const bat *src, bool external);
gdk_export ssize_t ptrFromStr(const char *src, size_t *len, ptr **dst, bool external);
gdk_export ssize_t ptrToStr(str *dst, size_t *len, const ptr *src, bool external);
gdk_export ssize_t bitFromStr(const char *src, size_t *len, bit **dst, bool external);
gdk_export ssize_t bitToStr(str *dst, size_t *len, const bit *src, bool external);
gdk_export ssize_t OIDfromStr(const char *src, size_t *len, oid **dst, bool external);
gdk_export ssize_t OIDtoStr(str *dst, size_t *len, const oid *src, bool external);
gdk_export ssize_t shtFromStr(const char *src, size_t *len, sht **dst, bool external);
gdk_export ssize_t shtToStr(str *dst, size_t *len, const sht *src, bool external);
gdk_export ssize_t bteFromStr(const char *src, size_t *len, bte **dst, bool external);
gdk_export ssize_t bteToStr(str *dst, size_t *len, const bte *src, bool external);
gdk_export ssize_t fltFromStr(const char *src, size_t *len, flt **dst, bool external);
gdk_export ssize_t fltToStr(str *dst, size_t *len, const flt *src, bool external);
gdk_export ssize_t dblFromStr(const char *src, size_t *len, dbl **dst, bool external);
gdk_export ssize_t dblToStr(str *dst, size_t *len, const dbl *src, bool external);
gdk_export ssize_t GDKstrFromStr(unsigned char *restrict dst, const unsigned char *restrict src, ssize_t len);
gdk_export ssize_t strFromStr(const char *restrict src, size_t *restrict len, str *restrict dst, bool external);
gdk_export size_t escapedStrlen(const char *restrict src, const char *sep1, const char *sep2, int quote);
gdk_export size_t escapedStr(char *restrict dst, const char *restrict src, size_t dstlen, const char *sep1, const char *sep2, int quote);
/*
* @- nil values
* All types have a single value designated as a NIL value. It
* designates a missing value and it is ignored (forbidden) in several
* primitives. The current policy is to use the smallest value in any
* ordered domain. The routine atomnil returns a pointer to the nil
* value representation.
*/
#define GDK_bit_max ((bit) 1)
#define GDK_bit_min ((bit) 0)
#define GDK_bte_max ((bte) INT8_MAX)
#define GDK_bte_min ((bte) INT8_MIN+1)
#define GDK_sht_max ((sht) INT16_MAX)
#define GDK_sht_min ((sht) INT16_MIN+1)
#define GDK_int_max ((int) INT32_MAX)
#define GDK_int_min ((int) INT32_MIN+1)
#define GDK_lng_max ((lng) INT64_MAX)
#define GDK_lng_min ((lng) INT64_MIN+1)
#ifdef HAVE_HGE
#define GDK_hge_max ((((hge) 1) << 126) - 1 + (((hge) 1) << 126))
#define GDK_hge_min (-GDK_hge_max)
#endif
#define GDK_flt_max ((flt) FLT_MAX)
#define GDK_flt_min ((flt) -FLT_MAX)
#define GDK_dbl_max ((dbl) DBL_MAX)
#define GDK_dbl_min ((dbl) -DBL_MAX)
#define GDK_oid_max (((oid) 1 << ((8 * SIZEOF_OID) - 1)) - 1)
#define GDK_oid_min ((oid) 0)
/* representation of the nil */
gdk_export const bte bte_nil;
gdk_export const sht sht_nil;
gdk_export const int int_nil;
#ifdef NAN_CANNOT_BE_USED_AS_INITIALIZER
/* Definition of NAN is seriously broken on Intel compiler (at least
* in some versions), so we work around it. */
union _flt_nil_t {
uint32_t l;
flt f;
};
gdk_export const union _flt_nil_t _flt_nil_;
#define flt_nil (_flt_nil_.f)
union _dbl_nil_t {
uint64_t l;
dbl d;
};
gdk_export const union _dbl_nil_t _dbl_nil_;
#define dbl_nil (_dbl_nil_.d)
#else
gdk_export const flt flt_nil;
gdk_export const dbl dbl_nil;
#endif
gdk_export const lng lng_nil;
#ifdef HAVE_HGE
gdk_export const hge hge_nil;
#endif
gdk_export const oid oid_nil;
gdk_export const char str_nil[2];
gdk_export const ptr ptr_nil;
gdk_export const uuid uuid_nil;
/* derived NIL values - OIDDEPEND */
#define bit_nil ((bit) bte_nil)
#define bat_nil ((bat) int_nil)
#define void_nil oid_nil
#define is_bit_nil(v) ((v) == GDK_bte_min-1)
#define is_bte_nil(v) ((v) == GDK_bte_min-1)
#define is_sht_nil(v) ((v) == GDK_sht_min-1)
#define is_int_nil(v) ((v) == GDK_int_min-1)
#define is_lng_nil(v) ((v) == GDK_lng_min-1)
#ifdef HAVE_HGE
#define is_hge_nil(v) ((v) == GDK_hge_min-1)
#endif
#define is_oid_nil(v) ((v) == ((oid) 1 << ((8 * SIZEOF_OID) - 1)))
#define is_flt_nil(v) isnan(v)
#define is_dbl_nil(v) isnan(v)
#define is_bat_nil(v) (((v) & 0x7FFFFFFF) == 0) /* v == bat_nil || v == 0 */
#include <math.h>
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && _MSC_VER < 1800
#include <float.h>
#define isnan(x) _isnan(x)
#define isinf(x) (_fpclass(x) & (_FPCLASS_NINF | _FPCLASS_PINF))
#define isfinite(x) _finite(x)
#endif
#ifdef HAVE_HGE
#define is_uuid_nil(x) ((x).h == 0)
#else
#ifdef HAVE_UUID
#define is_uuid_nil(x) uuid_is_null((x).u)
#else
#define is_uuid_nil(x) (memcmp((x).u, uuid_nil.u, UUID_SIZE) == 0)
#endif
#endif
/*
* @- Derived types
* In all algorithms across GDK, you will find switches on the types
* (bte, sht, int, flt, dbl, lng, hge, str). They respectively
* represent an octet, a 16-bit int, a 32-bit int, a 32-bit float, a
* 64-bit double, a 64-bit int, a 128-bit int, and a pointer-sized location
* of a char-buffer (ended by a zero char).
*
* In contrast, the types (bit, ptr, bat, oid) are derived types. They
* do not occur in the switches. The ATOMstorage macro maps them
* respectively onto a @code{ bte}, @code{ int} (pointers are 32-bit),
* @code{ int}, and @code{ int}. OIDs are 32-bit.
*
* This approach makes it tractable to switch to 64-bits OIDs, or to a
* fully 64-bits OS easily. One only has to map the @code{ oid} and
* @code{ ptr} types to @code{ lng} instead of @code{ int}.
*
* Derived types mimic their fathers in many ways. They inherit the
* @code{ size}, @code{ linear}, and @code{ null}
* properties of their father. The same goes for the
* ADT functions HASH, CMP, PUT, NULL, DEL, LEN, and HEAP. So, a
* derived type differs in only two ways from its father:
* @table @code
* @item [string representation]
* the only two ADT operations specific for a derived type are FROMSTR
* and TOSTR.
* @item [identity]
* (a @code{ bit} is really of a different type than @code{ bte}). The
* set of operations on derived type values or BATs of such types may
* differ from the sets of operations on the father type.
* @end table
*/
/* use "do ... while(0)" so that lhs can safely be used in if statements */
#define ATOMstorage(t) BATatoms[t].storage
#define ATOMsize(t) BATatoms[t].size
#define ATOMfromstr(t,s,l,src,ext) BATatoms[t].atomFromStr(src,l,s,ext)
#define ATOMnilptr(t) BATatoms[t].atomNull
#define ATOMcompare(t) BATatoms[t].atomCmp
#define ATOMcmp(t,l,r) ((*ATOMcompare(t))(l, r))
#define ATOMhash(t,src) BATatoms[t].atomHash(src)
#define ATOMdel(t,hp,src) do if (BATatoms[t].atomDel) BATatoms[t].atomDel(hp,src); while (0)
#define ATOMvarsized(t) (BATatoms[t].atomPut != NULL)
#define ATOMlinear(t) BATatoms[t].linear
#define ATOMtype(t) ((t) == TYPE_void ? TYPE_oid : (t))
#define ATOMfix(t,v) (BATatoms[t].atomFix ? BATatoms[t].atomFix(v) : GDK_SUCCEED)
#define ATOMunfix(t,v) (BATatoms[t].atomUnfix ? BATatoms[t].atomUnfix(v) : GDK_SUCCEED)
/* The base type is the storage type if the comparison function, the
* hash function, and the nil value are the same as those of the
* storage type; otherwise it is the type itself. */
#define ATOMbasetype(t) ((t) != ATOMstorage(t) && \
ATOMnilptr(t) == ATOMnilptr(ATOMstorage(t)) && \
ATOMcompare(t) == ATOMcompare(ATOMstorage(t)) && \
BATatoms[t].atomHash == BATatoms[ATOMstorage(t)].atomHash ? \
ATOMstorage(t) : (t))
/*
* In case that atoms are added to a bat, their logical reference
* count should be incremented (and decremented if deleted). Notice
* that BATs with atomic types that have logical references (e.g. BATs
* of BATs but also BATs of ODMG odSet) can never be persistent, as
* this would make the commit tremendously complicated.
*/
static inline gdk_return __attribute__((__warn_unused_result__))
ATOMputVAR(BAT *b, var_t *dst, const void *src)
{
assert(BATatoms[b->ttype].atomPut != NULL);
if ((*BATatoms[b->ttype].atomPut)(b, dst, src) == (var_t) -1)
return GDK_FAIL;
return GDK_SUCCEED;
}
static inline gdk_return __attribute__((__warn_unused_result__))
ATOMputFIX(int type, void *dst, const void *src)
{
gdk_return rc;
assert(BATatoms[type].atomPut == NULL);
rc = ATOMfix(type, src);
if (rc != GDK_SUCCEED)
return rc;
switch (ATOMsize(type)) {
case 0: /* void */
break;
case 1:
* (bte *) dst = * (bte *) src;
break;
case 2:
* (sht *) dst = * (sht *) src;
break;
case 4:
* (int *) dst = * (int *) src;
break;
case 8:
* (lng *) dst = * (lng *) src;
break;
case 16:
#ifdef HAVE_HGE
* (hge *) dst = * (hge *) src;
#else
* (uuid *) dst = * (uuid *) src;
#endif
break;
default:
memcpy(dst, src, ATOMsize(type));
break;
}
return GDK_SUCCEED;
}
static inline gdk_return __attribute__((__warn_unused_result__))
ATOMreplaceVAR(BAT *b, var_t *dst, const void *src)
{
var_t loc = *dst;
int type = b->ttype;
assert(BATatoms[type].atomPut != NULL);
if ((*BATatoms[type].atomPut)(b, &loc, src) == (var_t) -1)
return GDK_FAIL;
if (ATOMunfix(type, dst) != GDK_SUCCEED)
return GDK_FAIL;
ATOMdel(type, b->tvheap, dst);
*dst = loc;
return ATOMfix(type, src);
}
/* string heaps:
* - strings are 8 byte aligned
* - start with a 1024 bucket hash table
* - heaps < 64KiB are fully duplicate eliminated with this hash tables
* - heaps >= 64KiB are opportunistically (imperfect) duplicate
* eliminated as only the last 128KiB chunk is considered and there
* is no linked list
* - buckets and next pointers are unsigned short "indices"
* - indices should be multiplied by 8 and takes from ELIMBASE to get
* an offset
* Note that a 64KiB chunk of the heap contains at most 8K 8-byte
* aligned strings. The 1K bucket list means that in worst load, the
* list length is 8 (OK).
*/
#define GDK_STRHASHTABLE (1<<10) /* 1024 */
#define GDK_STRHASHMASK (GDK_STRHASHTABLE-1)
#define GDK_STRHASHSIZE (GDK_STRHASHTABLE * sizeof(stridx_t))
#define GDK_ELIMPOWER 16 /* 64KiB is the threshold */
#define GDK_ELIMDOUBLES(h) ((h)->free < GDK_ELIMLIMIT)
#define GDK_ELIMLIMIT (1<<GDK_ELIMPOWER) /* equivalently: ELIMBASE == 0 */
#define GDK_ELIMBASE(x) (((x) >> GDK_ELIMPOWER) << GDK_ELIMPOWER)
#define GDK_VAROFFSET ((var_t) GDK_STRHASHSIZE)
/*
* @- String Comparison, NILs and UTF-8
*
* Using the char* type for strings is handy as this is the type of
* any constant strings in a C/C++ program. Therefore, MonetDB uses
* this definition for str. However, different compilers and
* platforms use either signed or unsigned characters for the char
* type. It is required that string ordering in MonetDB is consistent
* over platforms though.
*
* As for the choice how strings should be ordered, our support for
* UTF-8 actually imposes that it should follow 'unsigned char'
* doctrine (like in the AIX native compiler). In this semantics,
* though we have to take corrective action to ensure that str(nil) is
* the smallest value of the domain.
*/
static inline bool __attribute__((__pure__))
strEQ(const char *l, const char *r)
{
return strcmp(l, r) == 0;
}
static inline bool __attribute__((__pure__))
strNil(const char *s)
{
return s == NULL || (s[0] == '\200' && s[1] == '\0');
}
static inline size_t __attribute__((__pure__))
strLen(const char *s)
{
return strNil(s) ? 2 : strlen(s) + 1;
}
static inline int __attribute__((__pure__))
strCmp(const char *l, const char *r)
{
return strNil(r)
? !strNil(l)
: strNil(l) ? -1 : strcmp(l, r);
}
static inline size_t
VarHeapVal(const void *b, BUN p, int w)
{
switch (w) {
case 1:
return (size_t) ((const uint8_t *) b)[p] + GDK_VAROFFSET;
case 2:
return (size_t) ((const uint16_t *) b)[p] + GDK_VAROFFSET;
#if SIZEOF_VAR_T == 8
case 4:
return (size_t) ((const uint32_t *) b)[p];
#endif
default:
return (size_t) ((const var_t *) b)[p];
}
}
static inline BUN __attribute__((__pure__))
strHash(const char *key)
{
BUN y = 0;
for (BUN i = 0; key[i]; i++) {
y += key[i];
y += (y << 10);
y ^= (y >> 6);
}
y += (y << 3);
y ^= (y >> 11);
y += (y << 15);
return y;
}
#endif /* _GDK_ATOMS_H_ */