C++ STL源码剖析之Hashtable
参考:
https://github.com/Light-City/CPlusPlusThings/blob/master/src_analysis/stl/hashtable.md
基于最新的github上的gcc源码来剖析Hashtable
https://github.com/gcc-mirror/gcc/tree/master/libstdc%2B%2B-v3/include/bits
Hashtable源码部分
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable.h
169行,可知_Hashtable的模板声明如下:
template<typename _Key, typename _Value, typename _Alloc,
typename _ExtractKey, typename _Equal,
typename _H1, typename _H2, typename _Hash,
typename _RehashPolicy, typename _Traits>
class _Hashtable
: public __detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal,
_H1, _H2, _Hash, _Traits>,
public __detail::_Map_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
_H1, _H2, _Hash, _RehashPolicy, _Traits>,
public __detail::_Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal,
_H1, _H2, _Hash, _RehashPolicy, _Traits>,
public __detail::_Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
_H1, _H2, _Hash, _RehashPolicy, _Traits>,
public __detail::_Equality<_Key, _Value, _Alloc, _ExtractKey, _Equal,
_H1, _H2, _Hash, _RehashPolicy, _Traits>,
private __detail::_Hashtable_alloc<
__alloc_rebind<_Alloc,
__detail::_Hash_node<_Value,
_Traits::__hash_cached::value>>>
{
...
};
_Hashtable的基类
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
1735行,_Hashtable的模板定义如下
- _Hashtable_base
/**
* Primary class template _Hashtable_base.
*
* Helper class adding management of _Equal functor to
* _Hash_code_base type.
*
* Base class templates are:
* - __detail::_Hash_code_base
* - __detail::_Hashtable_ebo_helper
*/
template<typename _Key, typename _Value,
typename _ExtractKey, typename _Equal,
typename _H1, typename _H2, typename _Hash, typename _Traits>
struct _Hashtable_base
: public _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2, _Hash,
_Traits::__hash_cached::value>,
private _Hashtable_ebo_helper<0, _Equal>
{
...
};
- 又有两个基类:
_Hash_code_base
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
1194行,_Hash_code_base的模板定义如下:
template<typename _Key, typename _Value, typename _ExtractKey,
typename _H1, typename _H2, typename _Hash,
bool __cache_hash_code>
struct _Hash_code_base;
根据是否缓存,得到其偏特化版本( __cache_hash_code=false/true)
- 使用范围哈希(实际上就是我们通常说的除留余数法),不缓存hash code。
/// Specialization: ranged hash function, no caching hash codes. H1
/// and H2 are provided but ignored. We define a dummy hash code type.
template<typename _Key, typename _Value, typename _ExtractKey,
typename _H1, typename _H2, typename _Hash>
struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2, _Hash, false>
: private _Hashtable_ebo_helper<0, _ExtractKey>,
private _Hashtable_ebo_helper<1, _Hash>
{
...
};
- 使用范围哈希(实际上就是我们通常说的除留余数法),缓存hash code。
从注释看到,这个偏特化没有用,所以只有声明没有定义
// No specialization for ranged hash function while caching hash codes.
// That combination is meaningless, and trying to do it is an error.
/// Specialization: ranged hash function, cache hash codes. This
/// combination is meaningless, so we provide only a declaration
/// and no definition.
template<typename _Key, typename _Value, typename _ExtractKey,
typename _H1, typename _H2, typename _Hash>
struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2, _Hash, true>;
- 有哈希函数以及范围哈希函数,不缓存hash code
/// Specialization: hash function and range-hashing function, no
/// caching of hash codes.
/// Provides typedef and accessor required by C++ 11.
template<typename _Key, typename _Value, typename _ExtractKey,
typename _H1, typename _H2>
struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2,
_Default_ranged_hash, false>
: private _Hashtable_ebo_helper<0, _ExtractKey>,
private _Hashtable_ebo_helper<1, _H1>,
private _Hashtable_ebo_helper<2, _H2>
{
...
};
- 有哈希函数以及范围哈希函数,缓存hash code
/// Specialization: hash function and range-hashing function,
/// caching hash codes. H is provided but ignored. Provides
/// typedef and accessor required by C++ 11.
template<typename _Key, typename _Value, typename _ExtractKey,
typename _H1, typename _H2>
struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2,
_Default_ranged_hash, true>
: private _Hashtable_ebo_helper<0, _ExtractKey>,
private _Hashtable_ebo_helper<1, _H1>,
private _Hashtable_ebo_helper<2, _H2>
{
...
};
_Hashtable_ebo_helper
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
1119行,_Hashtable_ebo_helper的模板定义如下:
备注:EBO:"Empty base optimization"空白基类最优化
当我们的派生类继承于一个Empty 类时,使用EBO将大大减小内存使用。
Empty 类不是真正的”空”类,只是不包含non-static成员变量,这个类里面往往还包含有enums、typedefs、static、non-virtual函数。
/**
* Primary class template _Hashtable_ebo_helper.
*
* Helper class using EBO when it is not forbidden (the type is not
* final) and when it is worth it (the type is empty.)
*/
template<int _Nm, typename _Tp,
bool __use_ebo = !__is_final(_Tp) && __is_empty(_Tp)>
struct _Hashtable_ebo_helper;
/// Specialization using EBO.
template<int _Nm, typename _Tp>
struct _Hashtable_ebo_helper<_Nm, _Tp, true>
: private _Tp
{
...
};
/// Specialization not using EBO.
template<int _Nm, typename _Tp>
struct _Hashtable_ebo_helper<_Nm, _Tp, false>
{
...
};
关于上面的H1和H2
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
422行,默认的范围哈希函数定义如下:
默认哈希函数:除留余数法
h(k, N) = h2(h1(k), N)
/// Default range hashing function: use division to fold a large number
/// into the range [0, N).
struct _Mod_range_hashing
{
typedef std::size_t first_argument_type;
typedef std::size_t second_argument_type;
typedef std::size_t result_type;
result_type
operator()(first_argument_type __num,
second_argument_type __den) const noexcept
{
return __num % __den; }
};
/// Default ranged hash function H. In principle it should be a
/// function object composed from objects of type H1 and H2 such that
/// h(k, N) = h2(h1(k), N), but that would mean making extra copies of
/// h1 and h2. So instead we'll just use a tag to tell class template
/// hashtable to do that composition.
struct _Default_ranged_hash {
};
重载了()操作符,就是个仿函数。
/// Range hashing function assuming that second arg is a power of 2.
struct _Mask_range_hashing
{
typedef std::size_t first_argument_type;
typedef std::size_t second_argument_type;
typedef std::size_t result_type;
result_type
operator()(first_argument_type __num,
second_argument_type __den) const noexcept
{
return __num & (__den - 1); }
};
第二个参数为2的幂时的范围哈希函数
rehash操作
散列表为防止碰撞导致效率下降,在存入数据过多时就必须扩容。
先说结论: ”当总的节点个数大于桶的个数就会扩容(说明每个桶最多能装的节点数是桶的总数,这跟侯捷老师的《STL源码剖析》中的hashtable是一致的),每次扩容都保证桶的个数是素数”
这里声明了_Prime_rehash_policy结构体:
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
445行,
/// Default value for rehash policy.
/// 桶的大小(bucket size) 默认通常是最小的素数,从而保证装载因子足够小
/// load factor: 容器当前元素数量与桶数量之比。最大加载因子默认值为1.0
struct _Prime_rehash_policy
{
using __has_load_factor = true_type;
_Prime_rehash_policy(float __z = 1.0) noexcept
: _M_max_load_factor(__z), _M_next_resize(0) {
}
float
max_load_factor() const noexcept
{
return _M_max_load_factor; }
// Return a bucket size no smaller than n.
//该函数会返回一个不小于n的素数作为桶的数目。
std::size_t
_M_next_bkt(std::size_t __n) const;
// Return a bucket count appropriate for n elements
//返回适合存储n个元素的桶的数目
std::size_t
_M_bkt_for_elements(std::size_t __n) const
{
return __builtin_ceill(__n / (long double)_M_max_load_factor); }
// __n_bkt is current bucket count, __n_elt is current element count,
// and __n_ins is number of elements to be inserted. Do we need to
// increase bucket count? If so, return make_pair(true, n), where n
// is the new bucket count. If not, return make_pair(false, 0).
//判断是否需要rehash,
// __n_bkt是当前桶数,__ n_elt是当前元素数目,__ n_ins是准备插入的元素数目。
//如果需要rehash,则返回make_pair(true,n),其中n是新的桶数。
//否则,则返回make_pair(false,0)。
std::pair<bool, std::size_t>
_M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt,
std::size_t __n_ins) const;
typedef std::size_t _State;
_State
_M_state() const
{
return _M_next_resize; }
void
_M_reset() noexcept
{
_M_next_resize = 0; }
void
_M_reset(_State __state)
{
_M_next_resize = __state; }
static

本文深入解析C++ STL中的Hashtable实现细节,包括基类结构、rehash操作、节点链表、迭代器设计及内部数据结构。重点介绍了_Hashtable的模板声明、构造与析构过程、插入节点函数及桶索引计算方法。

2043

被折叠的 条评论
为什么被折叠?



