Zend/zend_hash.h
Zend/zend_hash.c
万能的php数组,可以充当多种数据结构list,map等,而且可以包含多种类型的数据
有序的HashTable
拉链法解决hash冲突
Time33
struct
typedef struct _zend_array HashTable;
struct _zend_array {
zend_refcounted_h gc;
union {
struct {
ZEND_ENDIAN_LOHI_4(
zend_uchar flags,
zend_uchar nApplyCount,
zend_uchar nIteratorsCount,
zend_uchar consistency)
} v;
uint32_t flags;
} u;
uint32_t nTableMask;
// 存储
Bucket *arData;
// 已用Bucket数量
uint32_t nNumUsed;
// 有效元素数量 nNumUsed >= nNumOfElements
uint32_t nNumOfElements;
// hashTable总大小
uint32_t nTableSize;
uint32_t nInternalPointer;
// 下一个可用的数值索引
zend_long nNextFreeElement;
// 析构函数
dtor_func_t pDestructor;
};
typedef struct _Bucket {
// 值
zval val;
// 数字索引
zend_ulong h; /* hash value (or numeric index) */
// 字符串索引
zend_string *key; /* string key or NULL for numerics */
} Bucket;
/*
* HashTable Data Layout
* =====================
*
* +=============================+
* | HT_HASH(ht, ht->nTableMask) |
* | ... |
* | HT_HASH(ht, -1) |
* +-----------------------------+
* ht->arData ---> | Bucket[0] |
* | ... |
* | Bucket[ht->nTableSize-1] |
* +=============================+
*/
$arr["a"] = 1;
$arr["b"] = 2;
$arr["c"] = 3;
$arr["d"] = 4;
unset($arr["c"]);

初始化
ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent ZEND_FILE_LINE_DC)
{
GC_REFCOUNT(ht) = 1;
GC_TYPE_INFO(ht) = IS_ARRAY;
ht->u.flags = (persistent ? HASH_FLAG_PERSISTENT : 0) | HASH_FLAG_APPLY_PROTECTION | HASH_FLAG_STATIC_KEYS;
ht->nTableMask = HT_MIN_MASK;
HT_SET_DATA_ADDR(ht, &uninitialized_bucket);
ht->nNumUsed = 0;
ht->nNumOfElements = 0;
ht->nInternalPointer = HT_INVALID_IDX;
ht->nNextFreeElement = 0;
ht->pDestructor = pDestructor;
ht->nTableSize = zend_hash_check_size(nSize);
}
static void zend_always_inline zend_hash_real_init_ex(HashTable *ht, int packed)
{
HT_ASSERT(GC_REFCOUNT(ht) == 1);
ZEND_ASSERT(!((ht)->u.flags & HASH_FLAG_INITIALIZED));
if (packed) {
HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
(ht)->u.flags |= HASH_FLAG_INITIALIZED | HASH_FLAG_PACKED;
HT_HASH_RESET_PACKED(ht);
} else {
(ht)->nTableMask = -(ht)->nTableSize;
HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
(ht)->u.flags |= HASH_FLAG_INITIALIZED;
if (EXPECTED(ht->nTableMask == -8)) {
Bucket *arData = ht->arData;
HT_HASH_EX(arData, -8) = -1;
HT_HASH_EX(arData, -7) = -1;
HT_HASH_EX(arData, -6) = -1;
HT_HASH_EX(arData, -5) = -1;
HT_HASH_EX(arData, -4) = -1;
HT_HASH_EX(arData, -3) = -1;
HT_HASH_EX(arData, -2) = -1;
HT_HASH_EX(arData, -1) = -1;
} else {
HT_HASH_RESET(ht);
}
}
}
查找
static zend_always_inline Bucket *zend_hash_find_bucket(const HashTable *ht, zend_string *key)
{
zend_ulong h;
uint32_t nIndex;
uint32_t idx;
Bucket *p, *arData;
// 计算str的hash
h = zend_string_hash_val(key);
arData = ht->arData;
// 散列表中的位置
nIndex = h | ht->nTableMask;
// arData中Bucket的位置
idx = HT_HASH_EX(arData, nIndex);
while (EXPECTED(idx != HT_INVALID_IDX)) {
p = HT_HASH_TO_BUCKET_EX(arData, idx);
if (EXPECTED(p->key == key)) { /* check for the same interned string */
return p;
} else if (EXPECTED(p->h == h) &&
EXPECTED(p->key) &&
EXPECTED(ZSTR_LEN(p->key) == ZSTR_LEN(key)) &&
// 比较val
EXPECTED(memcmp(ZSTR_VAL(p->key), ZSTR_VAL(key), ZSTR_LEN(key)) == 0)) {
return p;
}
// 下一个Bucket
idx = Z_NEXT(p->val);
}
return NULL;
}
插入
static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_string *key, zval *pData, uint32_t flag ZEND_FILE_LINE_DC)
{
zend_ulong h;
uint32_t nIndex;
uint32_t idx;
Bucket *p;
IS_CONSISTENT(ht);
HT_ASSERT(GC_REFCOUNT(ht) == 1);
if (UNEXPECTED(!(ht->u.flags & HASH_FLAG_INITIALIZED))) {
CHECK_INIT(ht, 0);
goto add_to_hash;
} else if (ht->u.flags & HASH_FLAG_PACKED) {
zend_hash_packed_to_hash(ht);
} else if ((flag & HASH_ADD_NEW) == 0) {
p = zend_hash_find_bucket(ht, key);
if (p) {
zval *data;
if (flag & HASH_ADD) {
if (!(flag & HASH_UPDATE_INDIRECT)) {
return NULL;
}
ZEND_ASSERT(&p->val != pData);
data = &p->val;
if (Z_TYPE_P(data) == IS_INDIRECT) {
data = Z_INDIRECT_P(data);
if (Z_TYPE_P(data) != IS_UNDEF) {
return NULL;
}
} else {
return NULL;
}
} else {
ZEND_ASSERT(&p->val != pData);
data = &p->val;
if ((flag & HASH_UPDATE_INDIRECT) && Z_TYPE_P(data) == IS_INDIRECT) {
data = Z_INDIRECT_P(data);
}
}
HANDLE_BLOCK_INTERRUPTIONS();
if (ht->pDestructor) {
ht->pDestructor(data);
}
ZVAL_COPY_VALUE(data, pData);
HANDLE_UNBLOCK_INTERRUPTIONS();
return data;
}
}
ZEND_HASH_IF_FULL_DO_RESIZE(ht); /* If the Hash table is full, resize it */
add_to_hash:
HANDLE_BLOCK_INTERRUPTIONS();
//
idx = ht->nNumUsed++;
ht->nNumOfElements++;
if (ht->nInternalPointer == HT_INVALID_IDX) {
ht->nInternalPointer = idx;
}
zend_hash_iterators_update(ht, HT_INVALID_IDX, idx);
p = ht->arData + idx;
p->key = key;
if (!ZSTR_IS_INTERNED(key)) {
// 非内部zend_string refcount+1
zend_string_addref(key);
ht->u.flags &= ~HASH_FLAG_STATIC_KEYS;
// zend_string->h = Time33哈希
zend_string_hash_val(key);
}
// Bucket->h = zend_string->h
p->h = h = ZSTR_H(key);
// value
ZVAL_COPY_VALUE(&p->val, pData);
// 散列表index
nIndex = h | ht->nTableMask;
// 拉链
Z_NEXT(p->val) = HT_HASH(ht, nIndex);
// 写到ht->arData
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx);
HANDLE_UNBLOCK_INTERRUPTIONS();
return &p->val;
}
rehash
ZEND_API int ZEND_FASTCALL zend_hash_rehash(HashTable *ht)
{
Bucket *p;
uint32_t nIndex, i;
IS_CONSISTENT(ht);
if (UNEXPECTED(ht->nNumOfElements == 0)) {
if (ht->u.flags & HASH_FLAG_INITIALIZED) {
ht->nNumUsed = 0;
HT_HASH_RESET(ht);
}
return SUCCESS;
}
// 重置
HT_HASH_RESET(ht);
i = 0;
p = ht->arData;
if (ht->nNumUsed == ht->nNumOfElements) {
do {
nIndex = p->h | ht->nTableMask;
Z_NEXT(p->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);
p++;
} while (++i < ht->nNumUsed);
} else {
do {
if (UNEXPECTED(Z_TYPE(p->val) == IS_UNDEF)) {
uint32_t j = i;
Bucket *q = p;
if (EXPECTED(ht->u.v.nIteratorsCount == 0)) {
while (++i < ht->nNumUsed) {
p++;
if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {
ZVAL_COPY_VALUE(&q->val, &p->val);
q->h = p->h;
nIndex = q->h | ht->nTableMask;
q->key = p->key;
Z_NEXT(q->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
if (UNEXPECTED(ht->nInternalPointer == i)) {
ht->nInternalPointer = j;
}
q++;
j++;
}
}
} else {
uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, 0);
while (++i < ht->nNumUsed) {
p++;
if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {
ZVAL_COPY_VALUE(&q->val, &p->val);
q->h = p->h;
nIndex = q->h | ht->nTableMask;
q->key = p->key;
Z_NEXT(q->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
if (UNEXPECTED(ht->nInternalPointer == i)) {
ht->nInternalPointer = j;
}
if (UNEXPECTED(i == iter_pos)) {
zend_hash_iterators_update(ht, i, j);
iter_pos = zend_hash_iterators_lower_pos(ht, iter_pos + 1);
}
q++;
j++;
}
}
}
ht->nNumUsed = j;
break;
}
nIndex = p->h | ht->nTableMask;
Z_NEXT(p->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);
p++;
} while (++i < ht->nNumUsed);
}
return SUCCESS;
}
resize
static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht)
{
IS_CONSISTENT(ht);
HT_ASSERT(GC_REFCOUNT(ht) == 1);
// 不扩容,覆盖IS_UNDEF的Bucket
if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { /* additional term is there to amortize the cost of compaction */
HANDLE_BLOCK_INTERRUPTIONS();
zend_hash_rehash(ht);
HANDLE_UNBLOCK_INTERRUPTIONS();
// 扩容为原有的2倍
} else if (ht->nTableSize < HT_MAX_SIZE) { /* Let's double the table size */
void *new_data, *old_data = HT_GET_DATA_ADDR(ht);
// 2倍
uint32_t nSize = ht->nTableSize + ht->nTableSize;
Bucket *old_buckets = ht->arData;
HANDLE_BLOCK_INTERRUPTIONS();
new_data = pemalloc(HT_SIZE_EX(nSize, -nSize), ht->u.flags & HASH_FLAG_PERSISTENT);
ht->nTableSize = nSize;
ht->nTableMask = -ht->nTableSize;
HT_SET_DATA_ADDR(ht, new_data);
memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed);
pefree(old_data, ht->u.flags & HASH_FLAG_PERSISTENT);
zend_hash_rehash(ht);
HANDLE_UNBLOCK_INTERRUPTIONS();
} else {
zend_error_noreturn(E_ERROR, "Possible integer overflow in memory allocation (%zu * %zu + %zu)", ht->nTableSize * 2, sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket));
}
}