C++ std::unordered_set详解

最新推荐文章于 2026-03-27 11:32:16 发布

原创最新推荐文章于 2026-03-27 11:32:16 发布 · 584 阅读

4 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

标签

#c++ #算法 #开发语言

快速掌握C++ 专栏收录该内容

62 篇文章

订阅专栏

std::unordered_set是 C++ STL 中的一个关联容器，基于哈希表实现，存储唯一的元素集合。

基本介绍

头文件: <unordered_set>
元素特性: 唯一、无序
时间复杂度: 平均 O(1) 的插入、删除和查找
底层实现: 哈希表

基本用法

1. 创建和初始化

#include <unordered_set>
#include <iostream>
#include <vector>

void initializationExamples() {
    // 创建空的 unordered_set
    std::unordered_set<int> set1;
    
    // 使用初始化列表
    std::unordered_set<int> set2 = {1, 2, 3, 4, 5};
    
    // 使用迭代器范围
    std::vector<int> vec = {6, 7, 8, 9, 10};
    std::unordered_set<int> set3(vec.begin(), vec.end());
    
    // 拷贝构造
    std::unordered_set<int> set4(set2);
    
    // 移动构造
    std::unordered_set<int> set5(std::move(set3));
}

2. 插入元素

void insertionExamples() {
    std::unordered_set<int> mySet;
    
    // 使用 insert
    mySet.insert(10);
    mySet.insert(20);
    mySet.insert(30);
    
    // 插入重复元素（会被忽略）
    auto result1 = mySet.insert(10);
    if (!result1.second) {
        std::cout << "10 already exists in set" << std::endl;
    }
    
    // 使用 emplace（C++11）
    auto result2 = mySet.emplace(40);
    if (result2.second) {
        std::cout << "40 inserted successfully" << std::endl;
    }
    
    // 使用 insert 和初始化列表
    mySet.insert({50, 60, 70});
    
    // 使用 insert 和迭代器范围
    std::vector<int> newElements = {80, 90, 100};
    mySet.insert(newElements.begin(), newElements.end());
}

3. 查找元素

void searchExamples() {
    std::unordered_set<int> mySet = {10, 20, 30, 40, 50};
    
    // 使用 find
    auto it = mySet.find(30);
    if (it != mySet.end()) {
        std::cout << "Found: " << *it << std::endl;
    } else {
        std::cout << "30 not found" << std::endl;
    }
    
    // 使用 count
    if (mySet.count(25) > 0) {
        std::cout << "25 exists" << std::endl;
    } else {
        std::cout << "25 does not exist" << std::endl;
    }
    
    // 使用 contains（C++20）
    #if __cplusplus >= 202002L
    if (mySet.contains(40)) {
        std::cout << "40 exists" << std::endl;
    }
    #endif
}

4. 删除元素

void deletionExamples() {
    std::unordered_set<int> mySet = {10, 20, 30, 40, 50, 60, 70};
    
    // 通过值删除
    size_t count = mySet.erase(30);
    std::cout << "Removed " << count << " elements" << std::endl;
    
    // 通过迭代器删除
    auto it = mySet.find(40);
    if (it != mySet.end()) {
        mySet.erase(it);
    }
    
    // 通过迭代器范围删除
    auto first = mySet.find(50);
    if (first != mySet.end()) {
        mySet.erase(first, mySet.end()); // 删除从50到末尾的所有元素
    }
    
    // 删除所有元素
    mySet.clear();
}

5. 遍历元素

void traversalExamples() {
    std::unordered_set<std::string> fruits = {"apple", "banana", "orange", "grape"};
    
    // 使用迭代器
    std::cout << "Using iterator:" << std::endl;
    for (auto it = fruits.begin(); it != fruits.end(); ++it) {
        std::cout << *it << " ";
    }
    std::cout << std::endl;
    
    // 使用范围for循环
    std::cout << "Using range-based for:" << std::endl;
    for (const auto& fruit : fruits) {
        std::cout << fruit << " ";
    }
    std::cout << std::endl;
    
    // 使用 const 迭代器
    std::cout << "Using const iterator:" << std::endl;
    for (auto it = fruits.cbegin(); it != fruits.cend(); ++it) {
        std::cout << *it << " ";
    }
    std::cout << std::endl;
}

容量查询

void capacityExamples() {
    std::unordered_set<int> mySet = {1, 2, 3, 4, 5};
    
    std::cout << "Size: " << mySet.size() << std::endl;
    std::cout << "Empty: " << std::boolalpha << mySet.empty() << std::endl;
    std::cout << "Max size: " << mySet.max_size() << std::endl;
}

哈希相关操作

桶接口

void bucketInterfaceExamples() {
    std::unordered_set<std::string> mySet = {
        "apple", "banana", "orange", "grape", 
        "melon", "peach", "pear", "kiwi"
    };
    
    std::cout << "Bucket count: " << mySet.bucket_count() << std::endl;
    std::cout << "Max bucket count: " << mySet.max_bucket_count() << std::endl;
    
    // 遍历所有桶
    for (size_t i = 0; i < mySet.bucket_count(); ++i) {
        std::cout << "Bucket " << i << " has " << mySet.bucket_size(i) 
                  << " elements" << std::endl;
    }
    
    // 查看特定元素在哪个桶
    std::string fruit = "apple";
    std::cout << "'" << fruit << "' is in bucket " << mySet.bucket(fruit) << std::endl;
}

哈希策略

void hashPolicyExamples() {
    std::unordered_set<int> mySet;
    
    // 设置最大负载因子
    mySet.max_load_factor(0.7f);
    std::cout << "Current load factor: " << mySet.load_factor() << std::endl;
    std::cout << "Max load factor: " << mySet.max_load_factor() << std::endl;
    
    // 预分配桶的数量
    mySet.reserve(100);
    std::cout << "Bucket count after reserve: " << mySet.bucket_count() << std::endl;
    
    // 重新哈希
    mySet.rehash(50);
    std::cout << "Bucket count after rehash: " << mySet.bucket_count() << std::endl;
    
    // 插入一些元素查看负载因子变化
    for (int i = 0; i < 50; ++i) {
        mySet.insert(i);
    }
    
    std::cout << "Size: " << mySet.size() << std::endl;
    std::cout << "Bucket count: " << mySet.bucket_count() << std::endl;
    std::cout << "Load factor: " << mySet.load_factor() << std::endl;
}

自定义类型

自定义类作为元素

#include <unordered_set>
#include <string>
#include <functional>

class Person {
public:
    std::string name;
    int age;
    
    Person(const std::string& n, int a) : name(n), age(a) {}
    
    // 必须定义相等运算符
    bool operator==(const Person& other) const {
        return name == other.name && age == other.age;
    }
};

// 自定义哈希函数
struct PersonHash {
    std::size_t operator()(const Person& p) const {
        return std::hash<std::string>()(p.name) ^ 
               (std::hash<int>()(p.age) << 1);
    }
};

void customTypeExample() {
    std::unordered_set<Person, PersonHash> people;
    
    people.insert(Person("Alice", 25));
    people.insert(Person("Bob", 30));
    people.insert(Person("Charlie", 35));
    
    // 检查是否包含某个 Person
    Person alice("Alice", 25);
    if (people.find(alice) != people.end()) {
        std::cout << "Alice found in set" << std::endl;
    }
}

使用 std::hash 特化

namespace std {
    template<>
    struct hash<Person> {
        std::size_t operator()(const Person& p) const {
            return std::hash<std::string>()(p.name) ^ 
                   (std::hash<int>()(p.age) << 1);
        }
    };
}

void customTypeExample2() {
    // 现在可以省略哈希函数模板参数
    std::unordered_set<Person> people;
    people.insert(Person("Alice", 25));
    people.insert(Person("Bob", 30));
}

性能优化技巧

1. 预分配空间

void reserveExample() {
    // 预先分配空间以提高性能
    std::unordered_set<int> mySet;
    mySet.reserve(1000);  // 预分配1000个元素的空间
    
    for (int i = 0; i < 1000; ++i) {
        mySet.insert(i);
    }
}

2. 调整负载因子

void loadFactorExample() {
    std::unordered_set<int> mySet;
    
    // 降低负载因子可以减少冲突，提高性能
    mySet.max_load_factor(0.5f);
    
    for (int i = 0; i < 1000; ++i) {
        mySet.insert(i);
    }
    
    std::cout << "Final load factor: " << mySet.load_factor() << std::endl;
    std::cout << "Bucket count: " << mySet.bucket_count() << std::endl;
}

实际应用示例

1. 去重

#include <unordered_set>
#include <vector>
#include <iostream>

std::vector<int> removeDuplicates(const std::vector<int>& input) {
    std::unordered_set<int> seen;
    std::vector<int> result;
    
    for (int num : input) {
        if (seen.insert(num).second) {  // 如果插入成功（即元素不存在）
            result.push_back(num);
        }
    }
    
    return result;
}

void deduplicationExample() {
    std::vector<int> numbers = {1, 2, 2, 3, 4, 4, 4, 5, 1, 6};
    std::vector<int> uniqueNumbers = removeDuplicates(numbers);
    
    std::cout << "Original: ";
    for (int num : numbers) {
        std::cout << num << " ";
    }
    std::cout << std::endl;
    
    std::cout << "Unique: ";
    for (int num : uniqueNumbers) {
        std::cout << num << " ";
    }
    std::cout << std::endl;
}

2. 查找共同元素

#include <unordered_set>
#include <vector>

std::vector<int> findCommonElements(const std::vector<int>& vec1, 
                                   const std::vector<int>& vec2) {
    std::unordered_set<int> set1(vec1.begin(), vec1.end());
    std::vector<int> common;
    
    for (int num : vec2) {
        if (set1.count(num) > 0) {
            common.push_back(num);
        }
    }
    
    return common;
}

void commonElementsExample() {
    std::vector<int> v1 = {1, 2, 3, 4, 5};
    std::vector<int> v2 = {3, 4, 5, 6, 7};
    
    std::vector<int> common = findCommonElements(v1, v2);
    
    std::cout << "Common elements: ";
    for (int num : common) {
        std::cout << num << " ";
    }
    std::cout << std::endl;
}

3. 缓存实现

#include <unordered_set>
#include <list>

template<typename T>
class SimpleCache {
private:
    size_t capacity;
    std::list<T> recent;
    std::unordered_set<T> cache;
    
public:
    SimpleCache(size_t cap) : capacity(cap) {}
    
    bool contains(const T& item) {
        return cache.find(item) != cache.end();
    }
    
    void add(const T& item) {
        // 如果缓存已满，移除最旧的元素
        if (cache.size() >= capacity) {
            T oldest = recent.back();
            recent.pop_back();
            cache.erase(oldest);
        }
        
        // 添加新元素
        cache.insert(item);
        recent.push_front(item);
    }
    
    void print() const {
        std::cout << "Cache contents: ";
        for (const auto& item : recent) {
            std::cout << item << " ";
        }
        std::cout << std::endl;
    }
};

void cacheExample() {
    SimpleCache<int> cache(3);
    
    cache.add(1);
    cache.add(2);
    cache.add(3);
    cache.print();  // 输出: 3 2 1
    
    cache.add(4);
    cache.print();  // 输出: 4 3 2（1被移除）
    
    std::cout << "Contains 2: " << cache.contains(2) << std::endl;
    std::cout << "Contains 1: " << cache.contains(1) << std::endl;
}

与 std::set 的比较

特性	std::unordered_set	std::set
底层实现	哈希表	红黑树
时间复杂度	平均 O(1)	O(log n)
元素顺序	无序	有序
内存使用	通常更多	通常更少
自定义比较	需要哈希函数和相等比较	需要比较函数

完整示例

#include <unordered_set>
#include <iostream>
#include <string>

void comprehensiveExample() {
    // 创建和初始化
    std::unordered_set<std::string> programmingLanguages = {
        "C++", "Python", "Java", "JavaScript", "Go", "Rust"
    };
    
    // 插入元素
    programmingLanguages.insert("C#");
    programmingLanguages.emplace("Swift");
    
    // 尝试插入重复元素
    auto result = programmingLanguages.insert("C++");
    if (!result.second) {
        std::cout << "C++ already exists in set" << std::endl;
    }
    
    // 查找元素
    std::string lang = "Python";
    if (programmingLanguages.find(lang) != programmingLanguages.end()) {
        std::cout << lang << " found in set" << std::endl;
    }
    
    // 遍历元素
    std::cout << "All languages: ";
    for (const auto& language : programmingLanguages) {
        std::cout << language << " ";
    }
    std::cout << std::endl;
    
    // 删除元素
    programmingLanguages.erase("Java");
    
    // 容量信息
    std::cout << "Size: " << programmingLanguages.size() << std::endl;
    std::cout << "Bucket count: " << programmingLanguages.bucket_count() << std::endl;
    std::cout << "Load factor: " << programmingLanguages.load_factor() << std::endl;
    
    // 性能优化
    programmingLanguages.reserve(20);
    programmingLanguages.max_load_factor(0.75f);
    
    std::cout << "After optimization:" << std::endl;
    std::cout << "Bucket count: " << programmingLanguages.bucket_count() << std::endl;
    std::cout << "Load factor: " << programmingLanguages.load_factor() << std::endl;
}

int main() {
    comprehensiveExample();
    return 0;
}