022-生成器与迭代器

022-生成器与迭代器

🔴 难度: 高级 | ⏱️ 预计时间: 5小时 | 📋 前置: 021-装饰器原理与应用

学习目标

完成本章节后,你将能够:

  • 深入理解迭代器协议和生成器的工作原理
  • 掌握生成器函数和生成器表达式的使用
  • 学会创建自定义迭代器和生成器
  • 理解协程的基础概念和应用
  • 掌握生成器在内存优化和性能提升中的应用
  • 学会使用itertools模块进行高效的迭代操作

迭代器基础

迭代器协议

迭代器协议是Python中实现迭代的标准方式,包含两个核心方法:

# 迭代器协议演示
print("=== 迭代器协议演示 ===")

class NumberIterator:
    """数字迭代器示例"""
    
    def __init__(self, start, end):
        self.current = start
        self.end = end
    
    def __iter__(self):
        """返回迭代器对象本身"""
        return self
    
    def __next__(self):
        """返回下一个值"""
        if self.current >= self.end:
            raise StopIteration
        
        current = self.current
        self.current += 1
        return current

# 使用自定义迭代器
print("\n1. 自定义迭代器:")
number_iter = NumberIterator(1, 5)
for num in number_iter:
    print(f"数字: {num}")

# 手动迭代
print("\n2. 手动迭代:")
number_iter2 = NumberIterator(10, 13)
iterator = iter(number_iter2)
try:
    while True:
        value = next(iterator)
        print(f"手动获取: {value}")
except StopIteration:
    print("迭代结束")

# 内置类型的迭代器
print("\n3. 内置类型迭代器:")
my_list = [1, 2, 3, 4, 5]
list_iter = iter(my_list)
print(f"列表迭代器类型: {type(list_iter)}")
print(f"前三个元素: {[next(list_iter) for _ in range(3)]}")

可迭代对象 vs 迭代器

理解可迭代对象和迭代器的区别是掌握Python迭代机制的关键:

# 可迭代对象 vs 迭代器
print("\n=== 可迭代对象 vs 迭代器 ===")

from collections.abc import Iterable, Iterator

class CountDown:
    """倒计时可迭代对象"""
    
    def __init__(self, start):
        self.start = start
    
    def __iter__(self):
        """返回一个新的迭代器"""
        return CountDownIterator(self.start)

class CountDownIterator:
    """倒计时迭代器"""
    
    def __init__(self, start):
        self.current = start
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.current <= 0:
            raise StopIteration
        
        current = self.current
        self.current -= 1
        return current

# 测试可迭代对象和迭代器
countdown = CountDown(3)

print("\n1. 类型检查:")
print(f"CountDown是可迭代的: {isinstance(countdown, Iterable)}")
print(f"CountDown是迭代器: {isinstance(countdown, Iterator)}")

countdown_iter = iter(countdown)
print(f"CountDownIterator是可迭代的: {isinstance(countdown_iter, Iterable)}")
print(f"CountDownIterator是迭代器: {isinstance(countdown_iter, Iterator)}")

print("\n2. 多次迭代:")
# 可迭代对象可以多次迭代
for i in countdown:
    print(f"第一次迭代: {i}")

for i in countdown:
    print(f"第二次迭代: {i}")

print("\n3. 迭代器只能迭代一次:")
iterator = iter(countdown)
print(f"第一次: {list(iterator)}")
print(f"第二次: {list(iterator)}")  # 空列表,因为迭代器已耗尽

高级迭代器模式

# 高级迭代器模式
print("\n=== 高级迭代器模式 ===")

import itertools
from typing import Any, Iterator, Optional

class ChainedIterator:
    """链式迭代器 - 连接多个可迭代对象"""
    
    def __init__(self, *iterables):
        self.iterables = iterables
        self.current_iter = None
        self.iter_index = 0
    
    def __iter__(self):
        return self
    
    def __next__(self):
        while self.iter_index < len(self.iterables):
            if self.current_iter is None:
                self.current_iter = iter(self.iterables[self.iter_index])
            
            try:
                return next(self.current_iter)
            except StopIteration:
                self.current_iter = None
                self.iter_index += 1
        
        raise StopIteration

class FilteredIterator:
    """过滤迭代器"""
    
    def __init__(self, iterable, predicate):
        self.iterator = iter(iterable)
        self.predicate = predicate
    
    def __iter__(self):
        return self
    
    def __next__(self):
        while True:
            value = next(self.iterator)  # 可能抛出StopIteration
            if self.predicate(value):
                return value

class TransformIterator:
    """转换迭代器"""
    
    def __init__(self, iterable, transform_func):
        self.iterator = iter(iterable)
        self.transform_func = transform_func
    
    def __iter__(self):
        return self
    
    def __next__(self):
        value = next(self.iterator)
        return self.transform_func(value)

class TakeIterator:
    """限制数量的迭代器"""
    
    def __init__(self, iterable, count):
        self.iterator = iter(iterable)
        self.count = count
        self.taken = 0
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.taken >= self.count:
            raise StopIteration
        
        value = next(self.iterator)
        self.taken += 1
        return value

# 演示高级迭代器
print("\n1. 链式迭代器:")
chained = ChainedIterator([1, 2, 3], "abc", [4, 5])
print(f"链式结果: {list(chained)}")

print("\n2. 过滤迭代器:")
filtered = FilteredIterator(range(10), lambda x: x % 2 == 0)
print(f"偶数过滤: {list(filtered)}")

print("\n3. 转换迭代器:")
transformed = TransformIterator([1, 2, 3, 4], lambda x: x ** 2)
print(f"平方转换: {list(transformed)}")

print("\n4. 限制迭代器:")
taken = TakeIterator(itertools.count(1), 5)  # 从无限序列中取5个
print(f"取前5个: {list(taken)}")

print("\n5. 组合使用:")
# 组合多个迭代器
data = range(20)
result = TakeIterator(
    TransformIterator(
        FilteredIterator(data, lambda x: x % 3 == 0),
        lambda x: x * 2
    ),
    3
)
print(f"组合结果: {list(result)}")

生成器函数

基础生成器

生成器函数使用yield关键字,提供了一种简洁的方式来创建迭代器:

# 生成器函数基础
print("\n=== 生成器函数基础 ===")

def simple_generator():
    """简单生成器示例"""
    print("生成器开始")
    yield 1
    print("生成第一个值后")
    yield 2
    print("生成第二个值后")
    yield 3
    print("生成器结束")

def fibonacci_generator(n):
    """斐波那契数列生成器"""
    a, b = 0, 1
    count = 0
    while count < n:
        yield a
        a, b = b, a + b
        count += 1

def infinite_sequence():
    """无限序列生成器"""
    num = 0
    while True:
        yield num
        num += 1

def range_generator(start, stop, step=1):
    """自定义range生成器"""
    current = start
    while current < stop:
        yield current
        current += step

# 演示生成器函数
print("\n1. 简单生成器:")
gen = simple_generator()
print(f"生成器类型: {type(gen)}")
for value in gen:
    print(f"获得值: {value}")

print("\n2. 斐波那契生成器:")
fib_gen = fibonacci_generator(8)
print(f"前8个斐波那契数: {list(fib_gen)}")

print("\n3. 无限序列生成器:")
inf_gen = infinite_sequence()
print(f"前10个数: {[next(inf_gen) for _ in range(10)]}")

print("\n4. 自定义range生成器:")
custom_range = range_generator(0, 10, 2)
print(f"偶数序列: {list(custom_range)}")

print("\n5. 生成器状态:")
gen2 = simple_generator()
print(f"第一个值: {next(gen2)}")
print(f"第二个值: {next(gen2)}")
print(f"第三个值: {next(gen2)}")
try:
    print(f"第四个值: {next(gen2)}")
except StopIteration:
    print("生成器已耗尽")

生成器方法

生成器对象提供了几个重要的方法来控制执行流程:

# 生成器方法
print("\n=== 生成器方法 ===")

def controllable_generator():
    """可控制的生成器"""
    print("生成器启动")
    try:
        value = yield "第一个值"
        print(f"接收到: {value}")
        
        value = yield "第二个值"
        print(f"接收到: {value}")
        
        value = yield "第三个值"
        print(f"接收到: {value}")
        
    except GeneratorExit:
        print("生成器被关闭")
    except Exception as e:
        print(f"生成器异常: {e}")
        yield f"处理异常: {e}"
    finally:
        print("生成器清理")

def error_handling_generator():
    """错误处理生成器"""
    try:
        for i in range(5):
            try:
                value = yield i
                if value == "error":
                    raise ValueError("模拟错误")
                print(f"处理值: {value}")
            except ValueError as e:
                print(f"捕获错误: {e}")
                yield f"错误恢复: {i}"
    finally:
        print("生成器结束")

# 演示生成器方法
print("\n1. send()方法:")
gen = controllable_generator()
print(f"启动: {next(gen)}")
print(f"发送'hello': {gen.send('hello')}")
print(f"发送'world': {gen.send('world')}")

print("\n2. throw()方法:")
gen2 = controllable_generator()
next(gen2)  # 启动生成器
try:
    gen2.throw(ValueError, "测试异常")
except StopIteration:
    print("生成器正常结束")

print("\n3. close()方法:")
gen3 = controllable_generator()
next(gen3)  # 启动生成器
gen3.close()
print("生成器已关闭")

print("\n4. 错误处理生成器:")
error_gen = error_handling_generator()
print(f"值1: {next(error_gen)}")
print(f"发送正常值: {error_gen.send('normal')}")
print(f"值2: {next(error_gen)}")
print(f"发送错误: {error_gen.send('error')}")
print(f"继续: {next(error_gen)}")

生成器表达式

生成器表达式提供了创建生成器的简洁语法:

# 生成器表达式
print("\n=== 生成器表达式 ===")

import sys

# 基础生成器表达式
print("\n1. 基础生成器表达式:")
squares_gen = (x**2 for x in range(10))
print(f"生成器类型: {type(squares_gen)}")
print(f"前5个平方数: {[next(squares_gen) for _ in range(5)]}")

# 条件生成器表达式
print("\n2. 条件生成器表达式:")
even_squares = (x**2 for x in range(20) if x % 2 == 0)
print(f"偶数的平方: {list(even_squares)}")

# 嵌套生成器表达式
print("\n3. 嵌套生成器表达式:")
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flattened = (item for row in matrix for item in row)
print(f"扁平化矩阵: {list(flattened)}")

# 内存效率对比
print("\n4. 内存效率对比:")
n = 1000000

# 列表推导式
list_comp = [x**2 for x in range(n)]
list_size = sys.getsizeof(list_comp)
print(f"列表推导式内存: {list_size:,} 字节")

# 生成器表达式
gen_exp = (x**2 for x in range(n))
gen_size = sys.getsizeof(gen_exp)
print(f"生成器表达式内存: {gen_size:,} 字节")
print(f"内存节省: {(list_size - gen_size) / list_size * 100:.1f}%")

# 链式生成器表达式
print("\n5. 链式生成器表达式:")
data = range(100)
filtered = (x for x in data if x % 3 == 0)  # 过滤
transformed = (x * 2 for x in filtered)     # 转换
limited = (x for i, x in enumerate(transformed) if i < 5)  # 限制
result = list(limited)
print(f"链式处理结果: {result}")

# 生成器表达式作为函数参数
print("\n6. 生成器表达式作为函数参数:")
sum_of_squares = sum(x**2 for x in range(10))
print(f"平方和: {sum_of_squares}")

max_even = max(x for x in range(20) if x % 2 == 0)
print(f"最大偶数: {max_even}")

# 复杂的生成器表达式
print("\n7. 复杂的生成器表达式:")
words = ["hello", "world", "python", "generator"]
char_counts = {word: sum(1 for char in word if char.lower() in 'aeiou') 
               for word in words}
print(f"单词元音字母数: {char_counts}")

协程基础

基于生成器的协程

在Python 3.5之前,协程主要通过生成器实现:

# 基于生成器的协程
print("\n=== 基于生成器的协程 ===")

def coroutine_decorator(func):
    """协程装饰器 - 自动启动协程"""
    def wrapper(*args, **kwargs):
        gen = func(*args, **kwargs)
        next(gen)  # 启动协程
        return gen
    return wrapper

@coroutine_decorator
def accumulator():
    """累加器协程"""
    total = 0
    while True:
        value = yield total
        if value is not None:
            total += value

@coroutine_decorator
def averager():
    """平均值计算协程"""
    total = 0
    count = 0
    while True:
        value = yield total / count if count > 0 else 0
        if value is not None:
            total += value
            count += 1

@coroutine_decorator
def filter_coroutine(predicate, target):
    """过滤协程"""
    while True:
        value = yield
        if predicate(value):
            target.send(value)

@coroutine_decorator
def printer():
    """打印协程"""
    while True:
        value = yield
        print(f"接收到: {value}")

# 演示协程
print("\n1. 累加器协程:")
acc = accumulator()
print(f"初始值: {acc.send(None)}")
print(f"加10: {acc.send(10)}")
print(f"加20: {acc.send(20)}")
print(f"加5: {acc.send(5)}")

print("\n2. 平均值协程:")
avg = averager()
print(f"初始平均值: {avg.send(None)}")
print(f"加入10: {avg.send(10)}")
print(f"加入20: {avg.send(20)}")
print(f"加入30: {avg.send(30)}")

print("\n3. 协程管道:")
# 创建协程管道:过滤偶数 -> 打印
printer_coro = printer()
filter_coro = filter_coroutine(lambda x: x % 2 == 0, printer_coro)

# 发送数据到管道
for i in range(10):
    filter_coro.send(i)

协程状态管理

# 协程状态管理
print("\n=== 协程状态管理 ===")

import inspect
from enum import Enum

class CoroutineState(Enum):
    """协程状态枚举"""
    CREATED = "GEN_CREATED"
    RUNNING = "GEN_RUNNING"
    SUSPENDED = "GEN_SUSPENDED"
    CLOSED = "GEN_CLOSED"

def get_coroutine_state(coro):
    """获取协程状态"""
    state = inspect.getgeneratorstate(coro)
    return CoroutineState(state)

@coroutine_decorator
def stateful_coroutine():
    """有状态的协程"""
    print("协程启动")
    try:
        while True:
            value = yield "等待输入"
            print(f"处理: {value}")
            if value == "stop":
                break
    except GeneratorExit:
        print("协程被关闭")
    finally:
        print("协程清理")
    return "协程结束"

# 演示协程状态
print("\n1. 协程状态跟踪:")
coro = stateful_coroutine()
print(f"启动后状态: {get_coroutine_state(coro)}")

result = coro.send("hello")
print(f"发送数据后: {result}, 状态: {get_coroutine_state(coro)}")

result = coro.send("world")
print(f"再次发送: {result}, 状态: {get_coroutine_state(coro)}")

try:
    result = coro.send("stop")
except StopIteration as e:
    print(f"协程返回值: {e.value}")
    print(f"最终状态: {get_coroutine_state(coro)}")

高级生成器应用

数据流处理

生成器在数据流处理中非常有用,特别是处理大量数据时:

# 数据流处理
print("\n=== 数据流处理 ===")

import csv
import json
from io import StringIO
from typing import Generator, Any, Dict

def read_csv_data(csv_content: str) -> Generator[Dict[str, str], None, None]:
    """读取CSV数据生成器"""
    reader = csv.DictReader(StringIO(csv_content))
    for row in reader:
        yield row

def filter_data(data_stream: Generator, condition) -> Generator[Dict[str, str], None, None]:
    """数据过滤生成器"""
    for item in data_stream:
        if condition(item):
            yield item

def transform_data(data_stream: Generator, transformer) -> Generator[Any, None, None]:
    """数据转换生成器"""
    for item in data_stream:
        yield transformer(item)

def batch_data(data_stream: Generator, batch_size: int) -> Generator[list, None, None]:
    """数据分批生成器"""
    batch = []
    for item in data_stream:
        batch.append(item)
        if len(batch) >= batch_size:
            yield batch
            batch = []
    
    if batch:  # 处理最后一批
        yield batch

def aggregate_data(data_stream: Generator, key_func, agg_func) -> Generator[tuple, None, None]:
    """数据聚合生成器"""
    groups = {}
    for item in data_stream:
        key = key_func(item)
        if key not in groups:
            groups[key] = []
        groups[key].append(item)
    
    for key, items in groups.items():
        yield key, agg_func(items)

# 模拟CSV数据
csv_data = """name,age,department,salary
Alice,25,Engineering,75000
Bob,30,Engineering,80000
Charlie,35,Marketing,65000
Diana,28,Engineering,78000
Eve,32,Marketing,70000
Frank,29,Sales,60000
Grace,31,Engineering,82000"""

print("\n1. 数据流处理管道:")

# 创建数据处理管道
data_stream = read_csv_data(csv_data)
engineering_stream = filter_data(data_stream, lambda x: x['department'] == 'Engineering')
salary_stream = transform_data(engineering_stream, lambda x: int(x['salary']))
batched_stream = batch_data(salary_stream, 2)

print("工程部门薪资分批处理:")
for i, batch in enumerate(batched_stream, 1):
    print(f"批次 {i}: {batch}")

print("\n2. 数据聚合:")
# 按部门聚合平均薪资
data_stream2 = read_csv_data(csv_data)
agg_stream = aggregate_data(
    data_stream2,
    key_func=lambda x: x['department'],
    agg_func=lambda items: sum(int(item['salary']) for item in items) / len(items)
)

print("各部门平均薪资:")
for department, avg_salary in agg_stream:
    print(f"{department}: ${avg_salary:,.2f}")

print("\n3. 复杂数据处理:")
# 复杂的数据处理管道
def complex_data_pipeline(csv_content: str):
    """复杂数据处理管道"""
    # 读取数据
    data_stream = read_csv_data(csv_content)
    
    # 过滤:年龄大于等于30的员工
    filtered_stream = filter_data(data_stream, lambda x: int(x['age']) >= 30)
    
    # 转换:添加薪资等级
    def add_salary_grade(item):
        salary = int(item['salary'])
        if salary >= 80000:
            grade = 'A'
        elif salary >= 70000:
            grade = 'B'
        else:
            grade = 'C'
        return {**item, 'salary_grade': grade}
    
    transformed_stream = transform_data(filtered_stream, add_salary_grade)
    
    # 按薪资等级分组
    grouped_stream = aggregate_data(
        transformed_stream,
        key_func=lambda x: x['salary_grade'],
        agg_func=lambda items: {
            'count': len(items),
            'avg_age': sum(int(item['age']) for item in items) / len(items),
            'departments': list(set(item['department'] for item in items))
        }
    )
    
    return grouped_stream

result_stream = complex_data_pipeline(csv_data)
print("薪资等级分析(年龄>=30):")
for grade, stats in result_stream:
    print(f"等级 {grade}: {stats}")

内存优化应用

# 内存优化应用
print("\n=== 内存优化应用 ===")

import sys
import time
from typing import Generator

def memory_efficient_file_reader(filename: str, chunk_size: int = 1024) -> Generator[str, None, None]:
    """内存高效的文件读取器"""
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            while True:
                chunk = file.read(chunk_size)
                if not chunk:
                    break
                yield chunk
    except FileNotFoundError:
        print(f"文件 {filename} 不存在,使用模拟数据")
        # 模拟大文件内容
        for i in range(100):
            yield f"这是第 {i+1} 行数据\n" * 10

def process_large_dataset(size: int) -> Generator[int, None, None]:
    """处理大数据集的生成器"""
    for i in range(size):
        # 模拟复杂计算
        result = sum(j for j in range(i % 100))
        yield result

def fibonacci_memory_efficient(n: int) -> Generator[int, None, None]:
    """内存高效的斐波那契生成器"""
    a, b = 0, 1
    for _ in range(n):
        yield a
        a, b = b, a + b

def prime_generator(limit: int) -> Generator[int, None, None]:
    """质数生成器(埃拉托斯特尼筛法的生成器版本)"""
    if limit < 2:
        return
    
    yield 2
    
    # 只考虑奇数
    candidates = range(3, limit + 1, 2)
    
    for num in candidates:
        is_prime = True
        # 只需要检查到sqrt(num)
        for divisor in range(3, int(num**0.5) + 1, 2):
            if num % divisor == 0:
                is_prime = False
                break
        
        if is_prime:
            yield num

# 内存使用对比
print("\n1. 内存使用对比:")

# 传统方式:一次性加载所有数据
def traditional_approach(size: int) -> list:
    return [sum(j for j in range(i % 100)) for i in range(size)]

# 生成器方式
def generator_approach(size: int) -> Generator[int, None, None]:
    return process_large_dataset(size)

size = 10000

# 测量传统方式的内存使用
start_time = time.time()
traditional_result = traditional_approach(size)
traditional_time = time.time() - start_time
traditional_memory = sys.getsizeof(traditional_result)

print(f"传统方式:")
print(f"  时间: {traditional_time:.4f}秒")
print(f"  内存: {traditional_memory:,}字节")
print(f"  前5个结果: {traditional_result[:5]}")

# 测量生成器方式的内存使用
start_time = time.time()
generator_result = generator_approach(size)
generator_memory = sys.getsizeof(generator_result)

# 只取前5个值来测试
first_five = [next(generator_result) for _ in range(5)]
generator_time = time.time() - start_time

print(f"\n生成器方式:")
print(f"  时间: {generator_time:.4f}秒")
print(f"  内存: {generator_memory:,}字节")
print(f"  前5个结果: {first_five}")
print(f"  内存节省: {(traditional_memory - generator_memory) / traditional_memory * 100:.1f}%")

print("\n2. 大文件处理:")
# 模拟处理大文件
file_reader = memory_efficient_file_reader("large_file.txt", chunk_size=100)
chunk_count = 0
total_chars = 0

for chunk in file_reader:
    chunk_count += 1
    total_chars += len(chunk)
    if chunk_count <= 3:  # 只显示前3个块
        print(f"块 {chunk_count}: {len(chunk)} 字符")
    elif chunk_count == 4:
        print("...")

print(f"总共处理了 {chunk_count} 个块,{total_chars} 个字符")

print("\n3. 质数生成:")
primes = prime_generator(100)
print(f"100以内的质数: {list(primes)}")

print("\n4. 斐波那契数列:")
fib = fibonacci_memory_efficient(15)
print(f"前15个斐波那契数: {list(fib)}")

itertools模块

itertools模块提供了许多有用的迭代器工具:

# itertools模块应用
print("\n=== itertools模块应用 ===")

import itertools
from itertools import *

print("\n1. 无限迭代器:")

# count() - 无限计数
print("count(10, 2):", list(islice(count(10, 2), 5)))

# cycle() - 循环迭代
print("cycle('ABC'):", list(islice(cycle('ABC'), 8)))

# repeat() - 重复值
print("repeat('hello', 3):", list(repeat('hello', 3)))

print("\n2. 有限迭代器:")

# accumulate() - 累积
print("accumulate([1,2,3,4,5]):", list(accumulate([1, 2, 3, 4, 5])))
print("accumulate([1,2,3,4,5], operator.mul):", list(accumulate([1, 2, 3, 4, 5], lambda x, y: x * y)))

# chain() - 连接
print("chain([1,2], [3,4], [5,6]):", list(chain([1, 2], [3, 4], [5, 6])))

# compress() - 压缩
print("compress('ABCDEF', [1,0,1,0,1,1]):", list(compress('ABCDEF', [1, 0, 1, 0, 1, 1])))

# dropwhile() - 丢弃开头
print("dropwhile(lambda x: x<5, [1,4,6,4,1]):", list(dropwhile(lambda x: x < 5, [1, 4, 6, 4, 1])))

# takewhile() - 取开头
print("takewhile(lambda x: x<5, [1,4,6,4,1]):", list(takewhile(lambda x: x < 5, [1, 4, 6, 4, 1])))

# filterfalse() - 过滤假值
print("filterfalse(lambda x: x%2, range(10)):", list(filterfalse(lambda x: x % 2, range(10))))

# groupby() - 分组
data = [1, 1, 2, 2, 2, 3, 1, 1]
print("groupby([1,1,2,2,2,3,1,1]):")
for key, group in groupby(data):
    print(f"  {key}: {list(group)}")

print("\n3. 组合迭代器:")

# product() - 笛卡尔积
print("product('AB', '12'):", list(product('AB', '12')))
print("product([0,1], repeat=3):", list(product([0, 1], repeat=3)))

# permutations() - 排列
print("permutations('ABC', 2):", list(permutations('ABC', 2)))

# combinations() - 组合
print("combinations('ABCD', 2):", list(combinations('ABCD', 2)))

# combinations_with_replacement() - 可重复组合
print("combinations_with_replacement('AB', 2):", list(combinations_with_replacement('AB', 2)))

print("\n4. 实际应用示例:")

# 数据分析应用
def analyze_sales_data():
    """销售数据分析示例"""
    # 模拟销售数据:(日期, 产品, 销量)
    sales_data = [
        ('2024-01-01', 'A', 10),
        ('2024-01-01', 'B', 15),
        ('2024-01-01', 'A', 5),
        ('2024-01-02', 'A', 20),
        ('2024-01-02', 'B', 25),
        ('2024-01-02', 'C', 8),
        ('2024-01-03', 'A', 12),
        ('2024-01-03', 'C', 18),
    ]
    
    # 按日期分组
    sales_data.sort(key=lambda x: x[0])  # 先排序
    print("按日期分组的销售数据:")
    for date, group in groupby(sales_data, key=lambda x: x[0]):
        daily_sales = list(group)
        total = sum(item[2] for item in daily_sales)
        print(f"  {date}: {daily_sales}, 总计: {total}")
    
    # 产品组合分析
    products = ['A', 'B', 'C']
    print("\n产品两两组合:")
    for combo in combinations(products, 2):
        print(f"  组合: {combo}")
    
    # 累积销量
    daily_totals = [35, 53, 30]  # 每日总销量
    cumulative = list(accumulate(daily_totals))
    print(f"\n累积销量: {cumulative}")

analyze_sales_data()

# 数据处理管道
def data_processing_pipeline():
    """数据处理管道示例"""
    # 原始数据
    numbers = range(1, 21)
    
    # 处理管道
    # 1. 过滤偶数
    evens = filter(lambda x: x % 2 == 0, numbers)
    
    # 2. 平方
    squares = map(lambda x: x ** 2, evens)
    
    # 3. 累积求和
    cumulative = accumulate(squares)
    
    # 4. 取前5个
    result = list(islice(cumulative, 5))
    
    print(f"\n数据处理管道结果: {result}")
    
    # 使用chain连接多个数据源
    data1 = [1, 2, 3]
    data2 = [4, 5, 6]
    data3 = [7, 8, 9]
    
    combined = chain(data1, data2, data3)
    processed = map(lambda x: x * 2, combined)
    filtered = filter(lambda x: x > 10, processed)
    
    print(f"链式处理结果: {list(filtered)}")

data_processing_pipeline()

# 高级组合应用
def advanced_combinations()

## 性能优化与最佳实践

### 生成器性能优化

```python
# 生成器性能优化
print("\n=== 生成器性能优化 ===")

import time
import sys
from typing import Generator, List

def performance_comparison():
    """性能对比测试"""
    n = 1000000
    
    # 测试1:内存使用对比
    print("\n1. 内存使用对比:")
    
    # 列表方式
    start_time = time.time()
    list_result = [x * 2 for x in range(n)]
    list_time = time.time() - start_time
    list_memory = sys.getsizeof(list_result)
    
    # 生成器方式
    start_time = time.time()
    gen_result = (x * 2 for x in range(n))
    gen_time = time.time() - start_time
    gen_memory = sys.getsizeof(gen_result)
    
    print(f"列表方式: {list_time:.4f}秒, {list_memory:,}字节")
    print(f"生成器方式: {gen_time:.4f}秒, {gen_memory:,}字节")
    print(f"内存节省: {(list_memory - gen_memory) / list_memory * 100:.1f}%")
    
    # 测试2:惰性求值的优势
    print("\n2. 惰性求值优势:")
    
    def expensive_operation(x):
        """模拟昂贵的操作"""
        time.sleep(0.001)  # 模拟耗时操作
        return x ** 2
    
    # 列表推导式(立即求值)
    start_time = time.time()
    list_comp = [expensive_operation(x) for x in range(100)]
    first_five_list = list_comp[:5]
    list_comp_time = time.time() - start_time
    
    # 生成器表达式(惰性求值)
    start_time = time.time()
    gen_exp = (expensive_operation(x) for x in range(100))
    first_five_gen = [next(gen_exp) for _ in range(5)]
    gen_exp_time = time.time() - start_time
    
    print(f"列表推导式(全部计算): {list_comp_time:.4f}秒")
    print(f"生成器表达式(只计算5个): {gen_exp_time:.4f}秒")
    print(f"速度提升: {list_comp_time / gen_exp_time:.1f}倍")

def optimized_generators():
    """优化的生成器示例"""
    
    # 优化1:避免重复计算
    def fibonacci_optimized(n: int) -> Generator[int, None, None]:
        """优化的斐波那契生成器"""
        a, b = 0, 1
        for _ in range(n):
            yield a
            a, b = b, a + b
    
    # 优化2:使用生成器进行流式处理
    def process_large_file_optimized(filename: str) -> Generator[str, None, None]:
        """优化的大文件处理"""
        try:
            with open(filename, 'r', encoding='utf-8') as file:
                for line in file:  # 逐行读取,而不是一次性读取全部
                    if line.strip():  # 跳过空行
                        yield line.strip().upper()
        except FileNotFoundError:
            # 模拟数据
            for i in range(1000):
                yield f"LINE {i+1}"
    
    # 优化3:生成器链式处理
    def chain_processing(data_source: Generator) -> Generator[str, None, None]:
        """链式处理生成器"""
        for item in data_source:
            if len(item) > 5:  # 过滤
                processed = item.replace('LINE', 'PROCESSED')  # 转换
                yield processed
    
    print("\n3. 优化的生成器应用:")
    
    # 斐波那契性能测试
    start_time = time.time()
    fib_gen = fibonacci_optimized(1000)
    fib_list = list(fib_gen)
    fib_time = time.time() - start_time
    print(f"生成1000个斐波那契数: {fib_time:.4f}秒")
    
    # 文件处理链
    file_gen = process_large_file_optimized("large_file.txt")
    processed_gen = chain_processing(file_gen)
    
    # 只处理前10个
    start_time = time.time()
    results = [next(processed_gen) for _ in range(10)]
    process_time = time.time() - start_time
    
    print(f"处理前10行: {process_time:.4f}秒")
    print(f"结果示例: {results[:3]}")

def memory_efficient_patterns():
    """内存高效的模式"""
    
    # 模式1:分批处理
    def batch_processor(data: Generator, batch_size: int) -> Generator[List, None, None]:
        """分批处理生成器"""
        batch = []
        for item in data:
            batch.append(item)
            if len(batch) >= batch_size:
                yield batch
                batch = []
        if batch:
            yield batch
    
    # 模式2:滑动窗口
    def sliding_window(data: Generator, window_size: int) -> Generator[List, None, None]:
        """滑动窗口生成器"""
        window = []
        for item in data:
            window.append(item)
            if len(window) > window_size:
                window.pop(0)
            if len(window) == window_size:
                yield window.copy()
    
    # 模式3:数据去重
    def unique_generator(data: Generator) -> Generator[any, None, None]:
        """去重生成器"""
        seen = set()
        for item in data:
            if item not in seen:
                seen.add(item)
                yield item
    
    print("\n4. 内存高效模式:")
    
    # 测试数据
    test_data = (x % 10 for x in range(50))  # 0-9重复的数据
    
    # 分批处理
    batched = batch_processor(test_data, 5)
    print("分批处理(批大小=5):")
    for i, batch in enumerate(batched):
        if i < 3:
            print(f"  批次{i+1}: {batch}")
        elif i == 3:
            print("  ...")
            break
    
    # 滑动窗口
    test_data2 = (x for x in range(10))
    windowed = sliding_window(test_data2, 3)
    print("\n滑动窗口(窗口大小=3):")
    for window in windowed:
        print(f"  窗口: {window}")
    
    # 去重
    test_data3 = (x % 5 for x in range(20))
    unique_data = unique_generator(test_data3)
    print(f"\n去重结果: {list(unique_data)}")

performance_comparison()
optimized_generators()
memory_efficient_patterns()

生成器调试技巧

# 生成器调试技巧
print("\n=== 生成器调试技巧 ===")

import functools
import inspect
from typing import Generator, Any

def debug_generator(func):
    """生成器调试装饰器"""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        gen = func(*args, **kwargs)
        
        def debug_next():
            try:
                value = next(gen)
                print(f"[DEBUG] {func.__name__} yielded: {value}")
                return value
            except StopIteration as e:
                print(f"[DEBUG] {func.__name__} finished")
                raise
        
        # 创建调试生成器
        while True:
            try:
                yield debug_next()
            except StopIteration:
                break
    
    return wrapper

def trace_generator(gen: Generator, name: str = "Generator") -> Generator:
    """跟踪生成器的执行"""
    print(f"[TRACE] {name} started")
    try:
        for i, value in enumerate(gen):
            print(f"[TRACE] {name}[{i}]: {value}")
            yield value
    except Exception as e:
        print(f"[TRACE] {name} error: {e}")
        raise
    finally:
        print(f"[TRACE] {name} finished")

def profile_generator(gen: Generator, name: str = "Generator") -> Generator:
    """性能分析生成器"""
    import time
    
    start_time = time.time()
    count = 0
    
    try:
        for value in gen:
            count += 1
            yield value
    finally:
        end_time = time.time()
        print(f"[PROFILE] {name}: {count} items in {end_time - start_time:.4f}s")
        if count > 0:
            print(f"[PROFILE] {name}: {(end_time - start_time) / count * 1000:.4f}ms per item")

@debug_generator
def sample_generator(n: int) -> Generator[int, None, None]:
    """示例生成器"""
    for i in range(n):
        yield i * 2

def problematic_generator() -> Generator[int, None, None]:
    """有问题的生成器"""
    for i in range(5):
        if i == 3:
            raise ValueError(f"Error at position {i}")
        yield i

# 调试示例
print("\n1. 调试装饰器:")
debug_gen = sample_generator(5)
result = list(debug_gen)
print(f"结果: {result}")

print("\n2. 跟踪生成器:")
original_gen = (x ** 2 for x in range(5))
traced_gen = trace_generator(original_gen, "SquareGenerator")
result = list(traced_gen)

print("\n3. 性能分析:")
perf_gen = (x for x in range(1000))
profiled_gen = profile_generator(perf_gen, "RangeGenerator")
# 只取前100个来测试
result = [next(profiled_gen) for _ in range(100)]

print("\n4. 错误处理:")
try:
    error_gen = problematic_generator()
    traced_error_gen = trace_generator(error_gen, "ProblematicGenerator")
    result = list(traced_error_gen)
except ValueError as e:
    print(f"捕获异常: {e}")

# 生成器状态检查
def check_generator_state(gen: Generator, name: str = "Generator"):
    """检查生成器状态"""
    state = inspect.getgeneratorstate(gen)
    print(f"{name} 状态: {state}")
    
    if hasattr(gen, 'gi_frame') and gen.gi_frame:
        print(f"  当前行号: {gen.gi_frame.f_lineno}")
        print(f"  局部变量: {gen.gi_frame.f_locals}")

print("\n5. 生成器状态检查:")
state_gen = sample_generator(3)
check_generator_state(state_gen, "StateGenerator")

# 取一个值
next(state_gen)
check_generator_state(state_gen, "StateGenerator")

# 取完所有值
list(state_gen)
check_generator_state(state_gen, "StateGenerator")

实践练习

练习1:数据流处理系统

创建一个完整的数据流处理系统,支持多种数据源和处理操作。

# 练习1:数据流处理系统
print("\n=== 练习1:数据流处理系统 ===")

import json
import csv
import random
from typing import Generator, Dict, Any, Callable, Optional
from abc import ABC, abstractmethod
from io import StringIO
from datetime import datetime, timedelta

class DataSource(ABC):
    """数据源抽象基类"""
    
    @abstractmethod
    def generate_data(self) -> Generator[Dict[str, Any], None, None]:
        """生成数据"""
        pass

class CSVDataSource(DataSource):
    """CSV数据源"""
    
    def __init__(self, csv_content: str):
        self.csv_content = csv_content
    
    def generate_data(self) -> Generator[Dict[str, Any], None, None]:
        reader = csv.DictReader(StringIO(self.csv_content))
        for row in reader:
            yield row

class JSONDataSource(DataSource):
    """JSON数据源"""
    
    def __init__(self, json_data: list):
        self.json_data = json_data
    
    def generate_data(self) -> Generator[Dict[str, Any], None, None]:
        for item in self.json_data:
            yield item

class RandomDataSource(DataSource):
    """随机数据源"""
    
    def __init__(self, count: int, seed: Optional[int] = None):
        self.count = count
        if seed:
            random.seed(seed)
    
    def generate_data(self) -> Generator[Dict[str, Any], None, None]:
        for i in range(self.count):
            yield {
                'id': i + 1,
                'value': random.randint(1, 100),
                'category': random.choice(['A', 'B', 'C']),
                'timestamp': datetime.now() - timedelta(days=random.randint(0, 30))
            }

class DataProcessor:
    """数据处理器"""
    
    def __init__(self, data_source: DataSource):
        self.data_source = data_source
        self.processors = []
    
    def filter(self, predicate: Callable[[Dict[str, Any]], bool]) -> 'DataProcessor':
        """添加过滤器"""
        def filter_processor(data_stream):
            for item in data_stream:
                if predicate(item):
                    yield item
        
        self.processors.append(filter_processor)
        return self
    
    def map(self, transformer: Callable[[Dict[str, Any]], Dict[str, Any]]) -> 'DataProcessor':
        """添加转换器"""
        def map_processor(data_stream):
            for item in data_stream:
                yield transformer(item)
        
        self.processors.append(map_processor)
        return self
    
    def group_by(self, key_func: Callable[[Dict[str, Any]], str]) -> 'DataProcessor':
        """按键分组"""
        def group_processor(data_stream):
            groups = {}
            for item in data_stream:
                key = key_func(item)
                if key not in groups:
                    groups[key] = []
                groups[key].append(item)
            
            for key, items in groups.items():
                yield {'group_key': key, 'items': items, 'count': len(items)}
        
        self.processors.append(group_processor)
        return self
    
    def aggregate(self, agg_func: Callable[[list], Dict[str, Any]]) -> 'DataProcessor':
        """聚合数据"""
        def agg_processor(data_stream):
            items = list(data_stream)
            if items:
                yield agg_func(items)
        
        self.processors.append(agg_processor)
        return self
    
    def take(self, count: int) -> 'DataProcessor':
        """限制数量"""
        def take_processor(data_stream):
            taken = 0
            for item in data_stream:
                if taken >= count:
                    break
                yield item
                taken += 1
        
        self.processors.append(take_processor)
        return self
    
    def execute(self) -> Generator[Dict[str, Any], None, None]:
        """执行处理管道"""
        data_stream = self.data_source.generate_data()
        
        for processor in self.processors:
            data_stream = processor(data_stream)
        
        return data_stream

class DataSink:
    """数据输出"""
    
    @staticmethod
    def to_list(data_stream: Generator) -> list:
        """转换为列表"""
        return list(data_stream)
    
    @staticmethod
    def to_json(data_stream: Generator, indent: int = 2) -> str:
        """转换为JSON字符串"""
        data = list(data_stream)
        return json.dumps(data, indent=indent, default=str)
    
    @staticmethod
    def to_csv(data_stream: Generator) -> str:
        """转换为CSV字符串"""
        data = list(data_stream)
        if not data:
            return ""
        
        output = StringIO()
        fieldnames = data[0].keys()
        writer = csv.DictWriter(output, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)
        return output.getvalue()
    
    @staticmethod
    def print_summary(data_stream: Generator, title: str = "数据摘要"):
        """打印数据摘要"""
        data = list(data_stream)
        print(f"\n{title}:")
        print(f"  记录数: {len(data)}")
        if data:
            print(f"  字段: {list(data[0].keys())}")
            print(f"  前3条记录:")
            for i, record in enumerate(data[:3], 1):
                print(f"    {i}. {record}")
            if len(data) > 3:
                print(f"    ... 还有 {len(data) - 3} 条记录")

# 演示数据流处理系统
print("\n=== 数据流处理系统演示 ===")

# 准备测试数据
csv_data = """id,name,age,department,salary
1,Alice,25,Engineering,75000
2,Bob,30,Engineering,80000
3,Charlie,35,Marketing,65000
4,Diana,28,Engineering,78000
5,Eve,32,Marketing,70000
6,Frank,29,Sales,60000
7,Grace,31,Engineering,82000
8,Henry,27,Sales,58000"""

json_data = [
    {'product': 'A', 'sales': 100, 'region': 'North'},
    {'product': 'B', 'sales': 150, 'region': 'South'},
    {'product': 'A', 'sales': 120, 'region': 'East'},
    {'product': 'C', 'sales': 80, 'region': 'West'},
    {'product': 'B', 'sales': 200, 'region': 'North'},
]

print("\n1. CSV数据处理:")
# 处理CSV数据:筛选工程部门,计算平均薪资
csv_source = CSVDataSource(csv_data)
csv_processor = DataProcessor(csv_source)

result = (csv_processor
          .filter(lambda x: x['department'] == 'Engineering')
          .map(lambda x: {**x, 'salary': int(x['salary'])})
          .aggregate(lambda items: {
              'department': 'Engineering',
              'count': len(items),
              'avg_salary': sum(item['salary'] for item in items) / len(items),
              'total_salary': sum(item['salary'] for item in items)
          })
          .execute())

DataSink.print_summary(result, "工程部门薪资统计")

print("\n2. JSON数据处理:")
# 处理JSON数据:按产品分组,计算销售总额
json_source = JSONDataSource(json_data)
json_processor = DataProcessor(json_source)

result = (json_processor
          .group_by(lambda x: x['product'])
          .map(lambda group: {
              'product': group['group_key'],
              'total_sales': sum(item['sales'] for item in group['items']),
              'regions': list(set(item['region'] for item in group['items'])),
              'avg_sales': sum(item['sales'] for item in group['items']) / len(group['items'])
          })
          .execute())

DataSink.print_summary(result, "产品销售统计")

print("\n3. 随机数据处理:")
# 处理随机数据:过滤高值数据,按类别分组
random_source = RandomDataSource(20, seed=42)
random_processor = DataProcessor(random_source)

result = (random_processor
          .filter(lambda x: x['value'] > 50)
          .map(lambda x: {**x, 'value_grade': 'High' if x['value'] > 80 else 'Medium'})
          .group_by(lambda x: x['category'])
          .take(3)
          .execute())

DataSink.print_summary(result, "随机数据分析")

print("\n4. 复杂数据管道:")
# 复杂的数据处理管道
complex_processor = DataProcessor(csv_source)

result = (complex_processor
          .filter(lambda x: int(x['age']) >= 30)  # 年龄>=30
          .map(lambda x: {  # 添加薪资等级
              **x,
              'age': int(x['age']),
              'salary': int(x['salary']),
              'salary_grade': 'A' if int(x['salary']) >= 80000 else 'B' if int(x['salary']) >= 70000 else 'C'
          })
          .group_by(lambda x: x['department'])  # 按部门分组
          .map(lambda group: {  # 计算部门统计
              'department': group['group_key'],
              'employee_count': group['count'],
              'avg_age': sum(emp['age'] for emp in group['items']) / len(group['items']),
              'avg_salary': sum(emp['salary'] for emp in group['items']) / len(group['items']),
              'salary_grades': {grade: sum(1 for emp in group['items'] if emp['salary_grade'] == grade) 
                              for grade in ['A', 'B', 'C']}
          })
          .execute())

DataSink.print_summary(result, "部门综合分析(年龄>=30)")

# 输出为不同格式
print("\n5. 数据输出格式:")
simple_data = DataProcessor(JSONDataSource(json_data[:3])).execute()

# 转换为JSON
json_output = DataSink.to_json(simple_data)
print(f"JSON格式:\n{json_output}")

# 重新生成数据(因为生成器已耗尽)
simple_data2 = DataProcessor(JSONDataSource(json_data[:3])).execute()
csv_output = DataSink.to_csv(simple_data2)
print(f"\nCSV格式:\n{csv_output}")

练习2:实时数据监控系统

创建一个实时数据监控系统,使用生成器处理连续的数据流。

# 练习2:实时数据监控系统
print("\n=== 练习2:实时数据监控系统 ===")

import time
import random
import threading
from collections import deque
from typing import Generator, Dict, Any, Optional
from datetime import datetime
from queue import Queue, Empty

class RealTimeDataSource:
    """实时数据源"""
    
    def __init__(self, source_name: str):
        self.source_name = source_name
        self.is_running = False
        self.data_queue = Queue()
        self.thread = None
    
    def start(self):
        """启动数据生成"""
        if not self.is_running:
            self.is_running = True
            self.thread = threading.Thread(target=self._generate_data)
            self.thread.daemon = True
            self.thread.start()
    
    def stop(self):
        """停止数据生成"""
        self.is_running = False
        if self.thread:
            self.thread.join()
    
    def _generate_data(self):
        """生成模拟数据"""
        counter = 0
        while self.is_running:
            data = {
                'source': self.source_name,
                'timestamp': datetime.now(),
                'value': random.uniform(0, 100),
                'status': random.choice(['normal', 'warning', 'error']),
                'counter': counter
            }
            self.data_queue.put(data)
            counter += 1
            time.sleep(random.uniform(0.1, 0.5))  # 随机间隔
    
    def get_data_stream(self) -> Generator[Dict[str, Any], None, None]:
        """获取数据流"""
        while True:
            try:
                data = self.data_queue.get(timeout=1.0)
                yield data
            except Empty:
                if not self.is_running:
                    break
                continue

class DataMonitor:
    """数据监控器"""
    
    def __init__(self, window_size: int = 10):
        self.window_size = window_size
        self.data_window = deque(maxlen=window_size)
        self.alerts = []
    
    def process_data_stream(self, data_stream: Generator) -> Generator[Dict[str, Any], None, None]:
        """处理数据流"""
        for data in data_stream:
            self.data_window.append(data)
            
            # 计算统计信息
            stats = self._calculate_stats()
            
            # 检查异常
            alerts = self._check_alerts(data, stats)
            
            # 生成监控结果
            result = {
                'timestamp': data['timestamp'],
                'source': data['source'],
                'current_value': data['value'],
                'current_status': data['status'],
                'stats': stats,
                'alerts': alerts,
                'window_size': len(self.data_window)
            }
            
            yield result
    
    def _calculate_stats(self) -> Dict[str, Any]:
        """计算统计信息"""
        if not self.data_window:
            return {}
        
        values = [item['value'] for item in self.data_window]
        statuses = [item['status'] for item in self.data_window]
        
        return {
            'avg_value': sum(values) / len(values),
            'min_value': min(values),
            'max_value': max(values),
            'status_counts': {status: statuses.count(status) for status in set(statuses)},
            'trend': self._calculate_trend(values)
        }
    
    def _calculate_trend(self, values: list) -> str:
        """计算趋势"""
        if len(values) < 2:
            return 'stable'
        
        recent_avg = sum(values[-3:]) / min(3, len(values))
        older_avg = sum(values[:-3]) / max(1, len(values) - 3) if len(values) > 3 else recent_avg
        
        if recent_avg > older_avg * 1.1:
            return 'increasing'
        elif recent_avg < older_avg * 0.9:
            return 'decreasing'
        else:
            return 'stable'
    
    def _check_alerts(self, current_data: Dict[str, Any], stats: Dict[str, Any]) -> list:
        """检查告警"""
        alerts = []
        
        # 值异常告警
        if current_data['value'] > 90:
            alerts.append({
                'type': 'high_value',
                'message': f"值过高: {current_data['value']:.2f}",
                'severity': 'warning'
            })
        
        if current_data['value'] < 10:
            alerts.append({
                'type': 'low_value',
                'message': f"值过低: {current_data['value']:.2f}",
                'severity': 'warning'
            })
        
        # 状态告警
        if current_data['status'] == 'error':
            alerts.append({
                'type': 'status_error',
                'message': "系统状态异常",
                'severity': 'error'
            })
        
        # 趋势告警
        if stats.get('trend') == 'increasing' and stats.get('avg_value', 0) > 80:
            alerts.append({
                'type': 'trend_alert',
                'message': "值持续上升且平均值过高",
                'severity': 'warning'
            })
        
        return alerts

class AlertManager:
    """告警管理器"""
    
    def __init__(self):
        self.alert_history = deque(maxlen=100)
        self.alert_counts = {'error': 0, 'warning': 0}
    
    def process_alerts(self, monitor_stream: Generator) -> Generator[Dict[str, Any], None, None]:
        """处理告警"""
        for monitor_data in monitor_stream:
            alerts = monitor_data.get('alerts', [])
            
            # 记录告警
            for alert in alerts:
                self.alert_history.append({
                    **alert,
                    'timestamp': monitor_data['timestamp'],
                    'source': monitor_data['source']
                })
                self.alert_counts[alert['severity']] += 1
            
            # 生成告警摘要
            alert_summary = {
                'current_alerts': len(alerts),
                'total_errors': self.alert_counts['error'],
                'total_warnings': self.alert_counts['warning'],
                'recent_alerts': list(self.alert_history)[-5:] if self.alert_history else []
            }
            
            result = {
                **monitor_data,
                'alert_summary': alert_summary
            }
            
            yield result

class DashboardDisplay:
    """仪表板显示"""
    
    @staticmethod
    def display_real_time(alert_stream: Generator, max_updates: int = 20):
        """实时显示"""
        update_count = 0
        
        for data in alert_stream:
            update_count += 1
            if update_count > max_updates:
                break
            
            # 清屏(简化版)
            print("\n" + "="*80)
            print(f"实时监控仪表板 - 更新 #{update_count}")
            print("="*80)
            
            # 基本信息
            print(f"时间: {data['timestamp'].strftime('%H:%M:%S')}")
            print(f"数据源: {data['source']}")
            print(f"当前值: {data['current_value']:.2f}")
            print(f"当前状态: {data['current_status']}")
            
            # 统计信息
            stats = data.get('stats', {})
            if stats:
                print(f"\n统计信息(窗口大小: {data['window_size']}):")
                print(f"  平均值: {stats.get('avg_value', 0):.2f}")
                print(f"  最小值: {stats.get('min_value', 0):.2f}")
                print(f"  最大值: {stats.get('max_value', 0):.2f}")
                print(f"  趋势: {stats.get('trend', 'unknown')}")
                print(f"  状态分布: {stats.get('status_counts', {})}")
            
            # 当前告警
            current_alerts = data.get('alerts', [])
            if current_alerts:
                print(f"\n🚨 当前告警 ({len(current_alerts)}):")
                for alert in current_alerts:
                    severity_icon = "🔴" if alert['severity'] == 'error' else "🟡"
                    print(f"  {severity_icon} {alert['message']}")
            else:
                print("\n✅ 无当前告警")
            
            # 告警摘要
            alert_summary = data.get('alert_summary', {})
            print(f"\n告警统计:")
            print(f"  总错误数: {alert_summary.get('total_errors', 0)}")
            print(f"  总警告数: {alert_summary.get('total_warnings', 0)}")
            
            # 最近告警
            recent_alerts = alert_summary.get('recent_alerts', [])
            if recent_alerts:
                print(f"\n最近告警:")
                for alert in recent_alerts[-3:]:
                    time_str = alert['timestamp'].strftime('%H:%M:%S')
                    print(f"  [{time_str}] {alert['message']}")
            
            time.sleep(1)  # 控制更新频率

# 演示实时监控系统
print("\n=== 实时监控系统演示 ===")

# 创建数据源
data_source = RealTimeDataSource("Sensor-001")

# 创建监控组件
monitor = DataMonitor(window_size=5)
alert_manager = AlertManager()

print("\n启动实时监控(将运行20次更新)...")
print("监控指标:值范围、状态变化、趋势分析")
print("告警规则:值>90或<10、状态错误、持续上升趋势")

try:
    # 启动数据源
    data_source.start()
    
    # 创建处理管道
    data_stream = data_source.get_data_stream()
    monitor_stream = monitor.process_data_stream(data_stream)
    alert_stream = alert_manager.process_alerts(monitor_stream)
    
    # 显示实时数据
    DashboardDisplay.display_real_time(alert_stream, max_updates=20)
    
finally:
    # 停止数据源
    data_source.stop()
    print("\n监控系统已停止")

print("\n监控系统演示完成")
print("系统特点:")
print("- 实时数据生成和处理")
print("- 滑动窗口统计分析")
print("- 多级告警检测")
print("- 实时仪表板显示")
print("- 内存高效的流式处理")

总结

核心知识点

  1. 迭代器协议

    • __iter__()__next__()方法
    • 可迭代对象与迭代器的区别
    • StopIteration异常的作用
  2. 生成器函数

    • yield关键字的使用
    • 生成器的状态保持
    • 生成器方法:send()throw()close()
  3. 生成器表达式

    • 简洁的生成器创建语法
    • 内存效率优势
    • 与列表推导式的对比
  4. 协程基础

    • 基于生成器的协程实现
    • 双向通信机制
    • 协程状态管理
  5. itertools模块

    • 无限迭代器:count()cycle()repeat()
    • 有限迭代器:accumulate()chain()compress()
    • 组合迭代器:product()permutations()combinations()

技能掌握

初级技能

  • 理解迭代器协议的基本概念
  • 能够创建简单的生成器函数
  • 使用生成器表达式进行基本数据处理
  • 了解itertools模块的常用函数

中级技能

  • 创建复杂的自定义迭代器
  • 使用生成器进行内存优化
  • 理解协程的基本概念和应用
  • 设计高效的数据处理管道

高级技能

  • 设计复杂的生成器系统
  • 优化生成器性能
  • 实现高级的协程模式
  • 解决大数据处理中的内存问题

最佳实践

  1. 内存效率

    • 优先使用生成器处理大数据集
    • 避免不必要的数据加载到内存
    • 使用流式处理代替批量处理
  2. 性能优化

    • 合理设计生成器链
    • 避免在生成器中进行重复计算
    • 使用适当的缓冲策略
  3. 错误处理

    • 妥善处理StopIteration异常
    • 在生成器中实现适当的错误恢复
    • 使用try-finally确保资源清理
  4. 代码可读性

    • 为复杂的生成器添加文档
    • 使用有意义的变量名
    • 适当分解复杂的生成器逻辑

常见陷阱

  1. 生成器耗尽

    • 生成器只能迭代一次
    • 需要重新创建生成器来重复迭代
  2. 内存泄漏

    • 生成器中的循环引用
    • 未正确关闭的资源
  3. 性能误区

    • 过度使用生成器导致调用开销
    • 不适当的生成器嵌套
  4. 调试困难

    • 生成器的惰性求值使调试复杂
    • 状态难以跟踪

性能考虑

  1. 内存使用

    • 生成器显著减少内存占用
    • 适合处理大数据集
    • 避免一次性加载所有数据
  2. 计算效率

    • 惰性求值避免不必要的计算
    • 流式处理提高响应速度
    • 合理的缓冲策略平衡内存和性能
  3. I/O优化

    • 生成器适合处理文件和网络数据
    • 减少I/O阻塞时间
    • 支持实时数据处理

下一步学习

  1. 深入学习

    • 异步生成器和async/await
    • 更复杂的协程模式
    • 生成器在并发编程中的应用
  2. 实践项目

    • 构建数据处理管道
    • 实现实时数据分析系统
    • 创建内存高效的文件处理工具
  3. 相关主题

    • 异步编程和协程
    • 函数式编程
    • 并发和并行处理

扩展阅读

  1. 官方文档

  2. 进阶资源

    • “Fluent Python” - Luciano Ramalho
    • “Effective Python” - Brett Slatkin
    • “Python Tricks” - Dan Bader
  3. 实际应用

    • 数据科学中的生成器应用
    • Web开发中的流式响应
    • 系统监控和日志处理

学习建议:生成器和迭代器是Python中非常重要的概念,建议通过大量实践来掌握。从简单的生成器开始,逐步学习更复杂的应用模式,特别关注内存效率和性能优化方面的应用。

下一章节023-上下文管理器:
“”“高级组合应用”“”
# 生成所有可能的密码组合(简化版)
digits = ‘0123456789’
letters = ‘abcdefghijklmnopqrstuvwxyz’

# 4位数字密码
four_digit_passwords = product(digits, repeat=4)
print(f"\n4位数字密码总数: {len(list(four_digit_passwords))}")

# 2位字母+2位数字的组合
letter_digit_combos = product(letters, repeat=2), product(digits, repeat=2)
mixed_passwords = product(*letter_digit_combos)

# 只显示前几个
sample_passwords = list(islice(mixed_passwords, 5))
print(f"混合密码示例: {sample_passwords}")

# 团队分组
team_members = ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank']
print(f"\n从{len(team_members)}人中选3人的所有组合:")
for i, team in enumerate(combinations(team_members, 3), 1):
    if i <= 5:  # 只显示前5个
        print(f"  团队{i}: {team}")
    elif i == 6:
        print("  ...")

total_combinations = len(list(combinations(team_members, 3)))
print(f"  总共有 {total_combinations} 种组合")

advanced_combinations()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

lvjesus

码力充电

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值