std::unordered_set是 C++ STL 中的一个关联容器,基于哈希表实现,存储唯一的元素集合。
基本介绍
-
头文件:
<unordered_set> -
元素特性: 唯一、无序
-
时间复杂度: 平均 O(1) 的插入、删除和查找
-
底层实现: 哈希表
基本用法
1. 创建和初始化
#include <unordered_set>
#include <iostream>
#include <vector>
void initializationExamples() {
// 创建空的 unordered_set
std::unordered_set<int> set1;
// 使用初始化列表
std::unordered_set<int> set2 = {1, 2, 3, 4, 5};
// 使用迭代器范围
std::vector<int> vec = {6, 7, 8, 9, 10};
std::unordered_set<int> set3(vec.begin(), vec.end());
// 拷贝构造
std::unordered_set<int> set4(set2);
// 移动构造
std::unordered_set<int> set5(std::move(set3));
}
2. 插入元素
void insertionExamples() {
std::unordered_set<int> mySet;
// 使用 insert
mySet.insert(10);
mySet.insert(20);
mySet.insert(30);
// 插入重复元素(会被忽略)
auto result1 = mySet.insert(10);
if (!result1.second) {
std::cout << "10 already exists in set" << std::endl;
}
// 使用 emplace(C++11)
auto result2 = mySet.emplace(40);
if (result2.second) {
std::cout << "40 inserted successfully" << std::endl;
}
// 使用 insert 和初始化列表
mySet.insert({50, 60, 70});
// 使用 insert 和迭代器范围
std::vector<int> newElements = {80, 90, 100};
mySet.insert(newElements.begin(), newElements.end());
}
3. 查找元素
void searchExamples() {
std::unordered_set<int> mySet = {10, 20, 30, 40, 50};
// 使用 find
auto it = mySet.find(30);
if (it != mySet.end()) {
std::cout << "Found: " << *it << std::endl;
} else {
std::cout << "30 not found" << std::endl;
}
// 使用 count
if (mySet.count(25) > 0) {
std::cout << "25 exists" << std::endl;
} else {
std::cout << "25 does not exist" << std::endl;
}
// 使用 contains(C++20)
#if __cplusplus >= 202002L
if (mySet.contains(40)) {
std::cout << "40 exists" << std::endl;
}
#endif
}
4. 删除元素
void deletionExamples() {
std::unordered_set<int> mySet = {10, 20, 30, 40, 50, 60, 70};
// 通过值删除
size_t count = mySet.erase(30);
std::cout << "Removed " << count << " elements" << std::endl;
// 通过迭代器删除
auto it = mySet.find(40);
if (it != mySet.end()) {
mySet.erase(it);
}
// 通过迭代器范围删除
auto first = mySet.find(50);
if (first != mySet.end()) {
mySet.erase(first, mySet.end()); // 删除从50到末尾的所有元素
}
// 删除所有元素
mySet.clear();
}
5. 遍历元素
void traversalExamples() {
std::unordered_set<std::string> fruits = {"apple", "banana", "orange", "grape"};
// 使用迭代器
std::cout << "Using iterator:" << std::endl;
for (auto it = fruits.begin(); it != fruits.end(); ++it) {
std::cout << *it << " ";
}
std::cout << std::endl;
// 使用范围for循环
std::cout << "Using range-based for:" << std::endl;
for (const auto& fruit : fruits) {
std::cout << fruit << " ";
}
std::cout << std::endl;
// 使用 const 迭代器
std::cout << "Using const iterator:" << std::endl;
for (auto it = fruits.cbegin(); it != fruits.cend(); ++it) {
std::cout << *it << " ";
}
std::cout << std::endl;
}
容量查询
void capacityExamples() {
std::unordered_set<int> mySet = {1, 2, 3, 4, 5};
std::cout << "Size: " << mySet.size() << std::endl;
std::cout << "Empty: " << std::boolalpha << mySet.empty() << std::endl;
std::cout << "Max size: " << mySet.max_size() << std::endl;
}
哈希相关操作
桶接口
void bucketInterfaceExamples() {
std::unordered_set<std::string> mySet = {
"apple", "banana", "orange", "grape",
"melon", "peach", "pear", "kiwi"
};
std::cout << "Bucket count: " << mySet.bucket_count() << std::endl;
std::cout << "Max bucket count: " << mySet.max_bucket_count() << std::endl;
// 遍历所有桶
for (size_t i = 0; i < mySet.bucket_count(); ++i) {
std::cout << "Bucket " << i << " has " << mySet.bucket_size(i)
<< " elements" << std::endl;
}
// 查看特定元素在哪个桶
std::string fruit = "apple";
std::cout << "'" << fruit << "' is in bucket " << mySet.bucket(fruit) << std::endl;
}
哈希策略
void hashPolicyExamples() {
std::unordered_set<int> mySet;
// 设置最大负载因子
mySet.max_load_factor(0.7f);
std::cout << "Current load factor: " << mySet.load_factor() << std::endl;
std::cout << "Max load factor: " << mySet.max_load_factor() << std::endl;
// 预分配桶的数量
mySet.reserve(100);
std::cout << "Bucket count after reserve: " << mySet.bucket_count() << std::endl;
// 重新哈希
mySet.rehash(50);
std::cout << "Bucket count after rehash: " << mySet.bucket_count() << std::endl;
// 插入一些元素查看负载因子变化
for (int i = 0; i < 50; ++i) {
mySet.insert(i);
}
std::cout << "Size: " << mySet.size() << std::endl;
std::cout << "Bucket count: " << mySet.bucket_count() << std::endl;
std::cout << "Load factor: " << mySet.load_factor() << std::endl;
}
自定义类型
自定义类作为元素
#include <unordered_set>
#include <string>
#include <functional>
class Person {
public:
std::string name;
int age;
Person(const std::string& n, int a) : name(n), age(a) {}
// 必须定义相等运算符
bool operator==(const Person& other) const {
return name == other.name && age == other.age;
}
};
// 自定义哈希函数
struct PersonHash {
std::size_t operator()(const Person& p) const {
return std::hash<std::string>()(p.name) ^
(std::hash<int>()(p.age) << 1);
}
};
void customTypeExample() {
std::unordered_set<Person, PersonHash> people;
people.insert(Person("Alice", 25));
people.insert(Person("Bob", 30));
people.insert(Person("Charlie", 35));
// 检查是否包含某个 Person
Person alice("Alice", 25);
if (people.find(alice) != people.end()) {
std::cout << "Alice found in set" << std::endl;
}
}
使用 std::hash 特化
namespace std {
template<>
struct hash<Person> {
std::size_t operator()(const Person& p) const {
return std::hash<std::string>()(p.name) ^
(std::hash<int>()(p.age) << 1);
}
};
}
void customTypeExample2() {
// 现在可以省略哈希函数模板参数
std::unordered_set<Person> people;
people.insert(Person("Alice", 25));
people.insert(Person("Bob", 30));
}
性能优化技巧
1. 预分配空间
void reserveExample() {
// 预先分配空间以提高性能
std::unordered_set<int> mySet;
mySet.reserve(1000); // 预分配1000个元素的空间
for (int i = 0; i < 1000; ++i) {
mySet.insert(i);
}
}
2. 调整负载因子
void loadFactorExample() {
std::unordered_set<int> mySet;
// 降低负载因子可以减少冲突,提高性能
mySet.max_load_factor(0.5f);
for (int i = 0; i < 1000; ++i) {
mySet.insert(i);
}
std::cout << "Final load factor: " << mySet.load_factor() << std::endl;
std::cout << "Bucket count: " << mySet.bucket_count() << std::endl;
}
实际应用示例
1. 去重
#include <unordered_set>
#include <vector>
#include <iostream>
std::vector<int> removeDuplicates(const std::vector<int>& input) {
std::unordered_set<int> seen;
std::vector<int> result;
for (int num : input) {
if (seen.insert(num).second) { // 如果插入成功(即元素不存在)
result.push_back(num);
}
}
return result;
}
void deduplicationExample() {
std::vector<int> numbers = {1, 2, 2, 3, 4, 4, 4, 5, 1, 6};
std::vector<int> uniqueNumbers = removeDuplicates(numbers);
std::cout << "Original: ";
for (int num : numbers) {
std::cout << num << " ";
}
std::cout << std::endl;
std::cout << "Unique: ";
for (int num : uniqueNumbers) {
std::cout << num << " ";
}
std::cout << std::endl;
}
2. 查找共同元素
#include <unordered_set>
#include <vector>
std::vector<int> findCommonElements(const std::vector<int>& vec1,
const std::vector<int>& vec2) {
std::unordered_set<int> set1(vec1.begin(), vec1.end());
std::vector<int> common;
for (int num : vec2) {
if (set1.count(num) > 0) {
common.push_back(num);
}
}
return common;
}
void commonElementsExample() {
std::vector<int> v1 = {1, 2, 3, 4, 5};
std::vector<int> v2 = {3, 4, 5, 6, 7};
std::vector<int> common = findCommonElements(v1, v2);
std::cout << "Common elements: ";
for (int num : common) {
std::cout << num << " ";
}
std::cout << std::endl;
}
3. 缓存实现
#include <unordered_set>
#include <list>
template<typename T>
class SimpleCache {
private:
size_t capacity;
std::list<T> recent;
std::unordered_set<T> cache;
public:
SimpleCache(size_t cap) : capacity(cap) {}
bool contains(const T& item) {
return cache.find(item) != cache.end();
}
void add(const T& item) {
// 如果缓存已满,移除最旧的元素
if (cache.size() >= capacity) {
T oldest = recent.back();
recent.pop_back();
cache.erase(oldest);
}
// 添加新元素
cache.insert(item);
recent.push_front(item);
}
void print() const {
std::cout << "Cache contents: ";
for (const auto& item : recent) {
std::cout << item << " ";
}
std::cout << std::endl;
}
};
void cacheExample() {
SimpleCache<int> cache(3);
cache.add(1);
cache.add(2);
cache.add(3);
cache.print(); // 输出: 3 2 1
cache.add(4);
cache.print(); // 输出: 4 3 2(1被移除)
std::cout << "Contains 2: " << cache.contains(2) << std::endl;
std::cout << "Contains 1: " << cache.contains(1) << std::endl;
}
与 std::set 的比较
|
特性 |
std::unordered_set |
std::set |
|---|---|---|
|
底层实现 |
哈希表 |
红黑树 |
|
时间复杂度 |
平均 O(1) |
O(log n) |
|
元素顺序 |
无序 |
有序 |
|
内存使用 |
通常更多 |
通常更少 |
|
自定义比较 |
需要哈希函数和相等比较 |
需要比较函数 |
完整示例
#include <unordered_set>
#include <iostream>
#include <string>
void comprehensiveExample() {
// 创建和初始化
std::unordered_set<std::string> programmingLanguages = {
"C++", "Python", "Java", "JavaScript", "Go", "Rust"
};
// 插入元素
programmingLanguages.insert("C#");
programmingLanguages.emplace("Swift");
// 尝试插入重复元素
auto result = programmingLanguages.insert("C++");
if (!result.second) {
std::cout << "C++ already exists in set" << std::endl;
}
// 查找元素
std::string lang = "Python";
if (programmingLanguages.find(lang) != programmingLanguages.end()) {
std::cout << lang << " found in set" << std::endl;
}
// 遍历元素
std::cout << "All languages: ";
for (const auto& language : programmingLanguages) {
std::cout << language << " ";
}
std::cout << std::endl;
// 删除元素
programmingLanguages.erase("Java");
// 容量信息
std::cout << "Size: " << programmingLanguages.size() << std::endl;
std::cout << "Bucket count: " << programmingLanguages.bucket_count() << std::endl;
std::cout << "Load factor: " << programmingLanguages.load_factor() << std::endl;
// 性能优化
programmingLanguages.reserve(20);
programmingLanguages.max_load_factor(0.75f);
std::cout << "After optimization:" << std::endl;
std::cout << "Bucket count: " << programmingLanguages.bucket_count() << std::endl;
std::cout << "Load factor: " << programmingLanguages.load_factor() << std::endl;
}
int main() {
comprehensiveExample();
return 0;
}
总结
std::unordered_set是一个高效的集合容器,适用于以下场景:
-
需要快速查找、插入和删除
-
不关心元素顺序
-
需要存储唯一元素
-
有良好的哈希函数可用
优点:
-
平均 O(1) 时间复杂度的操作
-
自动处理重复元素
-
灵活的哈希策略配置
缺点:
-
最坏情况下性能较差(O(n))
-
迭代顺序不确定
-
内存使用通常比
std::set多
在实际使用中,如果不需要元素有序且哈希函数质量良好,std::unordered_set通常比 std::set性能更好。
2305

被折叠的 条评论
为什么被折叠?



