PHP

PHP7源码系列-数组的实现

2019年4月11日

Zend/zend_hash.h

Zend/zend_hash.c

万能的php数组,可以充当多种数据结构list,map等,而且可以包含多种类型的数据
有序的HashTable
拉链法解决hash冲突
Time33

struct

typedef struct _zend_array HashTable;

struct _zend_array {
    zend_refcounted_h gc;
    union {
        struct {
            ZEND_ENDIAN_LOHI_4(
                zend_uchar    flags,
                zend_uchar    nApplyCount,
                zend_uchar    nIteratorsCount,
                zend_uchar    consistency)
        } v;
        uint32_t flags;
    } u;
    uint32_t          nTableMask;
  // 存储
    Bucket           *arData;
  // 已用Bucket数量
    uint32_t          nNumUsed;
  // 有效元素数量 nNumUsed >= nNumOfElements
    uint32_t          nNumOfElements;
  // hashTable总大小
    uint32_t          nTableSize;

    uint32_t          nInternalPointer;
  // 下一个可用的数值索引
    zend_long         nNextFreeElement;
  // 析构函数
    dtor_func_t       pDestructor;
};


typedef struct _Bucket {
  // 值
    zval              val;
  // 数字索引
    zend_ulong        h;                /* hash value (or numeric index)   */
  // 字符串索引
    zend_string      *key;              /* string key or NULL for numerics */
} Bucket;
/*
 * HashTable Data Layout
 * =====================
 *
 *                 +=============================+
 *                 | HT_HASH(ht, ht->nTableMask) |
 *                 | ...                         |
 *                 | HT_HASH(ht, -1)             |
 *                 +-----------------------------+
 * ht->arData ---> | Bucket[0]                   |
 *                 | ...                         |
 *                 | Bucket[ht->nTableSize-1]    |
 *                 +=============================+
 */
$arr["a"] = 1;
$arr["b"] = 2;
$arr["c"] = 3;
$arr["d"] = 4;

unset($arr["c"]);
image.png

初始化

ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent ZEND_FILE_LINE_DC)
{
    GC_REFCOUNT(ht) = 1;
    GC_TYPE_INFO(ht) = IS_ARRAY;
    ht->u.flags = (persistent ? HASH_FLAG_PERSISTENT : 0) | HASH_FLAG_APPLY_PROTECTION | HASH_FLAG_STATIC_KEYS;
    ht->nTableMask = HT_MIN_MASK;
    HT_SET_DATA_ADDR(ht, &uninitialized_bucket);
    ht->nNumUsed = 0;
    ht->nNumOfElements = 0;
    ht->nInternalPointer = HT_INVALID_IDX;
    ht->nNextFreeElement = 0;
    ht->pDestructor = pDestructor;
    ht->nTableSize = zend_hash_check_size(nSize);
}

static void zend_always_inline zend_hash_real_init_ex(HashTable *ht, int packed)
{
    HT_ASSERT(GC_REFCOUNT(ht) == 1);
    ZEND_ASSERT(!((ht)->u.flags & HASH_FLAG_INITIALIZED));
    if (packed) {
        HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
        (ht)->u.flags |= HASH_FLAG_INITIALIZED | HASH_FLAG_PACKED;
        HT_HASH_RESET_PACKED(ht);
    } else {
        (ht)->nTableMask = -(ht)->nTableSize;
        HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
        (ht)->u.flags |= HASH_FLAG_INITIALIZED;
        if (EXPECTED(ht->nTableMask == -8)) {
            Bucket *arData = ht->arData;

            HT_HASH_EX(arData, -8) = -1;
            HT_HASH_EX(arData, -7) = -1;
            HT_HASH_EX(arData, -6) = -1;
            HT_HASH_EX(arData, -5) = -1;
            HT_HASH_EX(arData, -4) = -1;
            HT_HASH_EX(arData, -3) = -1;
            HT_HASH_EX(arData, -2) = -1;
            HT_HASH_EX(arData, -1) = -1;
        } else {
            HT_HASH_RESET(ht);
        }
    }
}

查找

static zend_always_inline Bucket *zend_hash_find_bucket(const HashTable *ht, zend_string *key)
{
    zend_ulong h;
    uint32_t nIndex;
    uint32_t idx;
    Bucket *p, *arData;
    // 计算str的hash
    h = zend_string_hash_val(key);
    arData = ht->arData;
  // 散列表中的位置
    nIndex = h | ht->nTableMask;
  // arData中Bucket的位置
    idx = HT_HASH_EX(arData, nIndex);

    while (EXPECTED(idx != HT_INVALID_IDX)) {
        p = HT_HASH_TO_BUCKET_EX(arData, idx);
        if (EXPECTED(p->key == key)) { /* check for the same interned string */
            return p;
        } else if (EXPECTED(p->h == h) &&
             EXPECTED(p->key) &&
             EXPECTED(ZSTR_LEN(p->key) == ZSTR_LEN(key)) &&
         // 比较val      
             EXPECTED(memcmp(ZSTR_VAL(p->key), ZSTR_VAL(key), ZSTR_LEN(key)) == 0)) {
            return p;
        }
    // 下一个Bucket
        idx = Z_NEXT(p->val);
    }
    return NULL;
}

插入

static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_string *key, zval *pData, uint32_t flag ZEND_FILE_LINE_DC)
{
    zend_ulong h;
    uint32_t nIndex;
    uint32_t idx;
    Bucket *p;

    IS_CONSISTENT(ht);
    HT_ASSERT(GC_REFCOUNT(ht) == 1);

    if (UNEXPECTED(!(ht->u.flags & HASH_FLAG_INITIALIZED))) {
        CHECK_INIT(ht, 0);
        goto add_to_hash;
    } else if (ht->u.flags & HASH_FLAG_PACKED) {
        zend_hash_packed_to_hash(ht);
    } else if ((flag & HASH_ADD_NEW) == 0) {
        p = zend_hash_find_bucket(ht, key);

        if (p) {
            zval *data;

            if (flag & HASH_ADD) {
                if (!(flag & HASH_UPDATE_INDIRECT)) {
                    return NULL;
                }
                ZEND_ASSERT(&p->val != pData);
                data = &p->val;
                if (Z_TYPE_P(data) == IS_INDIRECT) {
                    data = Z_INDIRECT_P(data);
                    if (Z_TYPE_P(data) != IS_UNDEF) {
                        return NULL;
                    }
                } else {
                    return NULL;
                }
            } else {
                ZEND_ASSERT(&p->val != pData);
                data = &p->val;
                if ((flag & HASH_UPDATE_INDIRECT) && Z_TYPE_P(data) == IS_INDIRECT) {
                    data = Z_INDIRECT_P(data);
                }
            }
            HANDLE_BLOCK_INTERRUPTIONS();
            if (ht->pDestructor) {
                ht->pDestructor(data);
            }
            ZVAL_COPY_VALUE(data, pData);
            HANDLE_UNBLOCK_INTERRUPTIONS();
            return data;
        }
    }

    ZEND_HASH_IF_FULL_DO_RESIZE(ht);        /* If the Hash table is full, resize it */

add_to_hash:
    HANDLE_BLOCK_INTERRUPTIONS();
  // 
    idx = ht->nNumUsed++;
    ht->nNumOfElements++;
    if (ht->nInternalPointer == HT_INVALID_IDX) {
        ht->nInternalPointer = idx;
    }
    zend_hash_iterators_update(ht, HT_INVALID_IDX, idx);
    p = ht->arData + idx;
    p->key = key;
    if (!ZSTR_IS_INTERNED(key)) {
    // 非内部zend_string refcount+1
        zend_string_addref(key);
        ht->u.flags &= ~HASH_FLAG_STATIC_KEYS;
    // zend_string->h = Time33哈希
        zend_string_hash_val(key);
    }
  // Bucket->h = zend_string->h
    p->h = h = ZSTR_H(key);
  // value
    ZVAL_COPY_VALUE(&p->val, pData);
  // 散列表index
    nIndex = h | ht->nTableMask;
  // 拉链
    Z_NEXT(p->val) = HT_HASH(ht, nIndex);
  // 写到ht->arData
    HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx);
    HANDLE_UNBLOCK_INTERRUPTIONS();

    return &p->val;
}

rehash

ZEND_API int ZEND_FASTCALL zend_hash_rehash(HashTable *ht)
{
    Bucket *p;
    uint32_t nIndex, i;

    IS_CONSISTENT(ht);

    if (UNEXPECTED(ht->nNumOfElements == 0)) {
        if (ht->u.flags & HASH_FLAG_INITIALIZED) {
            ht->nNumUsed = 0;
            HT_HASH_RESET(ht);
        }
        return SUCCESS;
    }
    // 重置
    HT_HASH_RESET(ht);
    i = 0;
    p = ht->arData;
    if (ht->nNumUsed == ht->nNumOfElements) {
        do {
            nIndex = p->h | ht->nTableMask;
            Z_NEXT(p->val) = HT_HASH(ht, nIndex);
            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);
            p++;
        } while (++i < ht->nNumUsed);
    } else {
        do {
            if (UNEXPECTED(Z_TYPE(p->val) == IS_UNDEF)) {
                uint32_t j = i;
                Bucket *q = p;

                if (EXPECTED(ht->u.v.nIteratorsCount == 0)) {
                    while (++i < ht->nNumUsed) {
                        p++;
                        if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {
                            ZVAL_COPY_VALUE(&q->val, &p->val);
                            q->h = p->h;
                            nIndex = q->h | ht->nTableMask;
                            q->key = p->key;
                            Z_NEXT(q->val) = HT_HASH(ht, nIndex);
                            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
                            if (UNEXPECTED(ht->nInternalPointer == i)) {
                                ht->nInternalPointer = j;
                            }
                            q++;
                            j++;
                        }
                    }
                } else {
                    uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, 0);

                    while (++i < ht->nNumUsed) {
                        p++;
                        if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {
                            ZVAL_COPY_VALUE(&q->val, &p->val);
                            q->h = p->h;
                            nIndex = q->h | ht->nTableMask;
                            q->key = p->key;
                            Z_NEXT(q->val) = HT_HASH(ht, nIndex);
                            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
                            if (UNEXPECTED(ht->nInternalPointer == i)) {
                                ht->nInternalPointer = j;
                            }
                            if (UNEXPECTED(i == iter_pos)) {
                                zend_hash_iterators_update(ht, i, j);
                                iter_pos = zend_hash_iterators_lower_pos(ht, iter_pos + 1);
                            }
                            q++;
                            j++;
                        }
                    }
                }
                ht->nNumUsed = j;
                break;
            }
            nIndex = p->h | ht->nTableMask;
            Z_NEXT(p->val) = HT_HASH(ht, nIndex);
            HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);
            p++;
        } while (++i < ht->nNumUsed);
    }
    return SUCCESS;
}

resize

static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht)
{

    IS_CONSISTENT(ht);
    HT_ASSERT(GC_REFCOUNT(ht) == 1);
    // 不扩容,覆盖IS_UNDEF的Bucket
    if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { /* additional term is there to amortize the cost of compaction */
        HANDLE_BLOCK_INTERRUPTIONS();
        zend_hash_rehash(ht);
        HANDLE_UNBLOCK_INTERRUPTIONS();
  // 扩容为原有的2倍
    } else if (ht->nTableSize < HT_MAX_SIZE) {  /* Let's double the table size */
        void *new_data, *old_data = HT_GET_DATA_ADDR(ht);
    // 2倍
        uint32_t nSize = ht->nTableSize + ht->nTableSize;
        Bucket *old_buckets = ht->arData;

        HANDLE_BLOCK_INTERRUPTIONS();
        new_data = pemalloc(HT_SIZE_EX(nSize, -nSize), ht->u.flags & HASH_FLAG_PERSISTENT);
        ht->nTableSize = nSize;
        ht->nTableMask = -ht->nTableSize;
        HT_SET_DATA_ADDR(ht, new_data);
        memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed);
        pefree(old_data, ht->u.flags & HASH_FLAG_PERSISTENT);
        zend_hash_rehash(ht);
        HANDLE_UNBLOCK_INTERRUPTIONS();
    } else {
        zend_error_noreturn(E_ERROR, "Possible integer overflow in memory allocation (%zu * %zu + %zu)", ht->nTableSize * 2, sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket));
    }
}

发表评论

电子邮件地址不会被公开。 必填项已用*标注