022-生成器与迭代器
🔴 难度: 高级 | ⏱️ 预计时间: 5小时 | 📋 前置: 021-装饰器原理与应用
学习目标
完成本章节后,你将能够:
- 深入理解迭代器协议和生成器的工作原理
- 掌握生成器函数和生成器表达式的使用
- 学会创建自定义迭代器和生成器
- 理解协程的基础概念和应用
- 掌握生成器在内存优化和性能提升中的应用
- 学会使用
itertools模块进行高效的迭代操作
迭代器基础
迭代器协议
迭代器协议是Python中实现迭代的标准方式,包含两个核心方法:
# 迭代器协议演示
print("=== 迭代器协议演示 ===")
class NumberIterator:
"""数字迭代器示例"""
def __init__(self, start, end):
self.current = start
self.end = end
def __iter__(self):
"""返回迭代器对象本身"""
return self
def __next__(self):
"""返回下一个值"""
if self.current >= self.end:
raise StopIteration
current = self.current
self.current += 1
return current
# 使用自定义迭代器
print("\n1. 自定义迭代器:")
number_iter = NumberIterator(1, 5)
for num in number_iter:
print(f"数字: {num}")
# 手动迭代
print("\n2. 手动迭代:")
number_iter2 = NumberIterator(10, 13)
iterator = iter(number_iter2)
try:
while True:
value = next(iterator)
print(f"手动获取: {value}")
except StopIteration:
print("迭代结束")
# 内置类型的迭代器
print("\n3. 内置类型迭代器:")
my_list = [1, 2, 3, 4, 5]
list_iter = iter(my_list)
print(f"列表迭代器类型: {type(list_iter)}")
print(f"前三个元素: {[next(list_iter) for _ in range(3)]}")
可迭代对象 vs 迭代器
理解可迭代对象和迭代器的区别是掌握Python迭代机制的关键:
# 可迭代对象 vs 迭代器
print("\n=== 可迭代对象 vs 迭代器 ===")
from collections.abc import Iterable, Iterator
class CountDown:
"""倒计时可迭代对象"""
def __init__(self, start):
self.start = start
def __iter__(self):
"""返回一个新的迭代器"""
return CountDownIterator(self.start)
class CountDownIterator:
"""倒计时迭代器"""
def __init__(self, start):
self.current = start
def __iter__(self):
return self
def __next__(self):
if self.current <= 0:
raise StopIteration
current = self.current
self.current -= 1
return current
# 测试可迭代对象和迭代器
countdown = CountDown(3)
print("\n1. 类型检查:")
print(f"CountDown是可迭代的: {isinstance(countdown, Iterable)}")
print(f"CountDown是迭代器: {isinstance(countdown, Iterator)}")
countdown_iter = iter(countdown)
print(f"CountDownIterator是可迭代的: {isinstance(countdown_iter, Iterable)}")
print(f"CountDownIterator是迭代器: {isinstance(countdown_iter, Iterator)}")
print("\n2. 多次迭代:")
# 可迭代对象可以多次迭代
for i in countdown:
print(f"第一次迭代: {i}")
for i in countdown:
print(f"第二次迭代: {i}")
print("\n3. 迭代器只能迭代一次:")
iterator = iter(countdown)
print(f"第一次: {list(iterator)}")
print(f"第二次: {list(iterator)}") # 空列表,因为迭代器已耗尽
高级迭代器模式
# 高级迭代器模式
print("\n=== 高级迭代器模式 ===")
import itertools
from typing import Any, Iterator, Optional
class ChainedIterator:
"""链式迭代器 - 连接多个可迭代对象"""
def __init__(self, *iterables):
self.iterables = iterables
self.current_iter = None
self.iter_index = 0
def __iter__(self):
return self
def __next__(self):
while self.iter_index < len(self.iterables):
if self.current_iter is None:
self.current_iter = iter(self.iterables[self.iter_index])
try:
return next(self.current_iter)
except StopIteration:
self.current_iter = None
self.iter_index += 1
raise StopIteration
class FilteredIterator:
"""过滤迭代器"""
def __init__(self, iterable, predicate):
self.iterator = iter(iterable)
self.predicate = predicate
def __iter__(self):
return self
def __next__(self):
while True:
value = next(self.iterator) # 可能抛出StopIteration
if self.predicate(value):
return value
class TransformIterator:
"""转换迭代器"""
def __init__(self, iterable, transform_func):
self.iterator = iter(iterable)
self.transform_func = transform_func
def __iter__(self):
return self
def __next__(self):
value = next(self.iterator)
return self.transform_func(value)
class TakeIterator:
"""限制数量的迭代器"""
def __init__(self, iterable, count):
self.iterator = iter(iterable)
self.count = count
self.taken = 0
def __iter__(self):
return self
def __next__(self):
if self.taken >= self.count:
raise StopIteration
value = next(self.iterator)
self.taken += 1
return value
# 演示高级迭代器
print("\n1. 链式迭代器:")
chained = ChainedIterator([1, 2, 3], "abc", [4, 5])
print(f"链式结果: {list(chained)}")
print("\n2. 过滤迭代器:")
filtered = FilteredIterator(range(10), lambda x: x % 2 == 0)
print(f"偶数过滤: {list(filtered)}")
print("\n3. 转换迭代器:")
transformed = TransformIterator([1, 2, 3, 4], lambda x: x ** 2)
print(f"平方转换: {list(transformed)}")
print("\n4. 限制迭代器:")
taken = TakeIterator(itertools.count(1), 5) # 从无限序列中取5个
print(f"取前5个: {list(taken)}")
print("\n5. 组合使用:")
# 组合多个迭代器
data = range(20)
result = TakeIterator(
TransformIterator(
FilteredIterator(data, lambda x: x % 3 == 0),
lambda x: x * 2
),
3
)
print(f"组合结果: {list(result)}")
生成器函数
基础生成器
生成器函数使用yield关键字,提供了一种简洁的方式来创建迭代器:
# 生成器函数基础
print("\n=== 生成器函数基础 ===")
def simple_generator():
"""简单生成器示例"""
print("生成器开始")
yield 1
print("生成第一个值后")
yield 2
print("生成第二个值后")
yield 3
print("生成器结束")
def fibonacci_generator(n):
"""斐波那契数列生成器"""
a, b = 0, 1
count = 0
while count < n:
yield a
a, b = b, a + b
count += 1
def infinite_sequence():
"""无限序列生成器"""
num = 0
while True:
yield num
num += 1
def range_generator(start, stop, step=1):
"""自定义range生成器"""
current = start
while current < stop:
yield current
current += step
# 演示生成器函数
print("\n1. 简单生成器:")
gen = simple_generator()
print(f"生成器类型: {type(gen)}")
for value in gen:
print(f"获得值: {value}")
print("\n2. 斐波那契生成器:")
fib_gen = fibonacci_generator(8)
print(f"前8个斐波那契数: {list(fib_gen)}")
print("\n3. 无限序列生成器:")
inf_gen = infinite_sequence()
print(f"前10个数: {[next(inf_gen) for _ in range(10)]}")
print("\n4. 自定义range生成器:")
custom_range = range_generator(0, 10, 2)
print(f"偶数序列: {list(custom_range)}")
print("\n5. 生成器状态:")
gen2 = simple_generator()
print(f"第一个值: {next(gen2)}")
print(f"第二个值: {next(gen2)}")
print(f"第三个值: {next(gen2)}")
try:
print(f"第四个值: {next(gen2)}")
except StopIteration:
print("生成器已耗尽")
生成器方法
生成器对象提供了几个重要的方法来控制执行流程:
# 生成器方法
print("\n=== 生成器方法 ===")
def controllable_generator():
"""可控制的生成器"""
print("生成器启动")
try:
value = yield "第一个值"
print(f"接收到: {value}")
value = yield "第二个值"
print(f"接收到: {value}")
value = yield "第三个值"
print(f"接收到: {value}")
except GeneratorExit:
print("生成器被关闭")
except Exception as e:
print(f"生成器异常: {e}")
yield f"处理异常: {e}"
finally:
print("生成器清理")
def error_handling_generator():
"""错误处理生成器"""
try:
for i in range(5):
try:
value = yield i
if value == "error":
raise ValueError("模拟错误")
print(f"处理值: {value}")
except ValueError as e:
print(f"捕获错误: {e}")
yield f"错误恢复: {i}"
finally:
print("生成器结束")
# 演示生成器方法
print("\n1. send()方法:")
gen = controllable_generator()
print(f"启动: {next(gen)}")
print(f"发送'hello': {gen.send('hello')}")
print(f"发送'world': {gen.send('world')}")
print("\n2. throw()方法:")
gen2 = controllable_generator()
next(gen2) # 启动生成器
try:
gen2.throw(ValueError, "测试异常")
except StopIteration:
print("生成器正常结束")
print("\n3. close()方法:")
gen3 = controllable_generator()
next(gen3) # 启动生成器
gen3.close()
print("生成器已关闭")
print("\n4. 错误处理生成器:")
error_gen = error_handling_generator()
print(f"值1: {next(error_gen)}")
print(f"发送正常值: {error_gen.send('normal')}")
print(f"值2: {next(error_gen)}")
print(f"发送错误: {error_gen.send('error')}")
print(f"继续: {next(error_gen)}")
生成器表达式
生成器表达式提供了创建生成器的简洁语法:
# 生成器表达式
print("\n=== 生成器表达式 ===")
import sys
# 基础生成器表达式
print("\n1. 基础生成器表达式:")
squares_gen = (x**2 for x in range(10))
print(f"生成器类型: {type(squares_gen)}")
print(f"前5个平方数: {[next(squares_gen) for _ in range(5)]}")
# 条件生成器表达式
print("\n2. 条件生成器表达式:")
even_squares = (x**2 for x in range(20) if x % 2 == 0)
print(f"偶数的平方: {list(even_squares)}")
# 嵌套生成器表达式
print("\n3. 嵌套生成器表达式:")
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flattened = (item for row in matrix for item in row)
print(f"扁平化矩阵: {list(flattened)}")
# 内存效率对比
print("\n4. 内存效率对比:")
n = 1000000
# 列表推导式
list_comp = [x**2 for x in range(n)]
list_size = sys.getsizeof(list_comp)
print(f"列表推导式内存: {list_size:,} 字节")
# 生成器表达式
gen_exp = (x**2 for x in range(n))
gen_size = sys.getsizeof(gen_exp)
print(f"生成器表达式内存: {gen_size:,} 字节")
print(f"内存节省: {(list_size - gen_size) / list_size * 100:.1f}%")
# 链式生成器表达式
print("\n5. 链式生成器表达式:")
data = range(100)
filtered = (x for x in data if x % 3 == 0) # 过滤
transformed = (x * 2 for x in filtered) # 转换
limited = (x for i, x in enumerate(transformed) if i < 5) # 限制
result = list(limited)
print(f"链式处理结果: {result}")
# 生成器表达式作为函数参数
print("\n6. 生成器表达式作为函数参数:")
sum_of_squares = sum(x**2 for x in range(10))
print(f"平方和: {sum_of_squares}")
max_even = max(x for x in range(20) if x % 2 == 0)
print(f"最大偶数: {max_even}")
# 复杂的生成器表达式
print("\n7. 复杂的生成器表达式:")
words = ["hello", "world", "python", "generator"]
char_counts = {word: sum(1 for char in word if char.lower() in 'aeiou')
for word in words}
print(f"单词元音字母数: {char_counts}")
协程基础
基于生成器的协程
在Python 3.5之前,协程主要通过生成器实现:
# 基于生成器的协程
print("\n=== 基于生成器的协程 ===")
def coroutine_decorator(func):
"""协程装饰器 - 自动启动协程"""
def wrapper(*args, **kwargs):
gen = func(*args, **kwargs)
next(gen) # 启动协程
return gen
return wrapper
@coroutine_decorator
def accumulator():
"""累加器协程"""
total = 0
while True:
value = yield total
if value is not None:
total += value
@coroutine_decorator
def averager():
"""平均值计算协程"""
total = 0
count = 0
while True:
value = yield total / count if count > 0 else 0
if value is not None:
total += value
count += 1
@coroutine_decorator
def filter_coroutine(predicate, target):
"""过滤协程"""
while True:
value = yield
if predicate(value):
target.send(value)
@coroutine_decorator
def printer():
"""打印协程"""
while True:
value = yield
print(f"接收到: {value}")
# 演示协程
print("\n1. 累加器协程:")
acc = accumulator()
print(f"初始值: {acc.send(None)}")
print(f"加10: {acc.send(10)}")
print(f"加20: {acc.send(20)}")
print(f"加5: {acc.send(5)}")
print("\n2. 平均值协程:")
avg = averager()
print(f"初始平均值: {avg.send(None)}")
print(f"加入10: {avg.send(10)}")
print(f"加入20: {avg.send(20)}")
print(f"加入30: {avg.send(30)}")
print("\n3. 协程管道:")
# 创建协程管道:过滤偶数 -> 打印
printer_coro = printer()
filter_coro = filter_coroutine(lambda x: x % 2 == 0, printer_coro)
# 发送数据到管道
for i in range(10):
filter_coro.send(i)
协程状态管理
# 协程状态管理
print("\n=== 协程状态管理 ===")
import inspect
from enum import Enum
class CoroutineState(Enum):
"""协程状态枚举"""
CREATED = "GEN_CREATED"
RUNNING = "GEN_RUNNING"
SUSPENDED = "GEN_SUSPENDED"
CLOSED = "GEN_CLOSED"
def get_coroutine_state(coro):
"""获取协程状态"""
state = inspect.getgeneratorstate(coro)
return CoroutineState(state)
@coroutine_decorator
def stateful_coroutine():
"""有状态的协程"""
print("协程启动")
try:
while True:
value = yield "等待输入"
print(f"处理: {value}")
if value == "stop":
break
except GeneratorExit:
print("协程被关闭")
finally:
print("协程清理")
return "协程结束"
# 演示协程状态
print("\n1. 协程状态跟踪:")
coro = stateful_coroutine()
print(f"启动后状态: {get_coroutine_state(coro)}")
result = coro.send("hello")
print(f"发送数据后: {result}, 状态: {get_coroutine_state(coro)}")
result = coro.send("world")
print(f"再次发送: {result}, 状态: {get_coroutine_state(coro)}")
try:
result = coro.send("stop")
except StopIteration as e:
print(f"协程返回值: {e.value}")
print(f"最终状态: {get_coroutine_state(coro)}")
高级生成器应用
数据流处理
生成器在数据流处理中非常有用,特别是处理大量数据时:
# 数据流处理
print("\n=== 数据流处理 ===")
import csv
import json
from io import StringIO
from typing import Generator, Any, Dict
def read_csv_data(csv_content: str) -> Generator[Dict[str, str], None, None]:
"""读取CSV数据生成器"""
reader = csv.DictReader(StringIO(csv_content))
for row in reader:
yield row
def filter_data(data_stream: Generator, condition) -> Generator[Dict[str, str], None, None]:
"""数据过滤生成器"""
for item in data_stream:
if condition(item):
yield item
def transform_data(data_stream: Generator, transformer) -> Generator[Any, None, None]:
"""数据转换生成器"""
for item in data_stream:
yield transformer(item)
def batch_data(data_stream: Generator, batch_size: int) -> Generator[list, None, None]:
"""数据分批生成器"""
batch = []
for item in data_stream:
batch.append(item)
if len(batch) >= batch_size:
yield batch
batch = []
if batch: # 处理最后一批
yield batch
def aggregate_data(data_stream: Generator, key_func, agg_func) -> Generator[tuple, None, None]:
"""数据聚合生成器"""
groups = {}
for item in data_stream:
key = key_func(item)
if key not in groups:
groups[key] = []
groups[key].append(item)
for key, items in groups.items():
yield key, agg_func(items)
# 模拟CSV数据
csv_data = """name,age,department,salary
Alice,25,Engineering,75000
Bob,30,Engineering,80000
Charlie,35,Marketing,65000
Diana,28,Engineering,78000
Eve,32,Marketing,70000
Frank,29,Sales,60000
Grace,31,Engineering,82000"""
print("\n1. 数据流处理管道:")
# 创建数据处理管道
data_stream = read_csv_data(csv_data)
engineering_stream = filter_data(data_stream, lambda x: x['department'] == 'Engineering')
salary_stream = transform_data(engineering_stream, lambda x: int(x['salary']))
batched_stream = batch_data(salary_stream, 2)
print("工程部门薪资分批处理:")
for i, batch in enumerate(batched_stream, 1):
print(f"批次 {i}: {batch}")
print("\n2. 数据聚合:")
# 按部门聚合平均薪资
data_stream2 = read_csv_data(csv_data)
agg_stream = aggregate_data(
data_stream2,
key_func=lambda x: x['department'],
agg_func=lambda items: sum(int(item['salary']) for item in items) / len(items)
)
print("各部门平均薪资:")
for department, avg_salary in agg_stream:
print(f"{department}: ${avg_salary:,.2f}")
print("\n3. 复杂数据处理:")
# 复杂的数据处理管道
def complex_data_pipeline(csv_content: str):
"""复杂数据处理管道"""
# 读取数据
data_stream = read_csv_data(csv_content)
# 过滤:年龄大于等于30的员工
filtered_stream = filter_data(data_stream, lambda x: int(x['age']) >= 30)
# 转换:添加薪资等级
def add_salary_grade(item):
salary = int(item['salary'])
if salary >= 80000:
grade = 'A'
elif salary >= 70000:
grade = 'B'
else:
grade = 'C'
return {**item, 'salary_grade': grade}
transformed_stream = transform_data(filtered_stream, add_salary_grade)
# 按薪资等级分组
grouped_stream = aggregate_data(
transformed_stream,
key_func=lambda x: x['salary_grade'],
agg_func=lambda items: {
'count': len(items),
'avg_age': sum(int(item['age']) for item in items) / len(items),
'departments': list(set(item['department'] for item in items))
}
)
return grouped_stream
result_stream = complex_data_pipeline(csv_data)
print("薪资等级分析(年龄>=30):")
for grade, stats in result_stream:
print(f"等级 {grade}: {stats}")
内存优化应用
# 内存优化应用
print("\n=== 内存优化应用 ===")
import sys
import time
from typing import Generator
def memory_efficient_file_reader(filename: str, chunk_size: int = 1024) -> Generator[str, None, None]:
"""内存高效的文件读取器"""
try:
with open(filename, 'r', encoding='utf-8') as file:
while True:
chunk = file.read(chunk_size)
if not chunk:
break
yield chunk
except FileNotFoundError:
print(f"文件 {filename} 不存在,使用模拟数据")
# 模拟大文件内容
for i in range(100):
yield f"这是第 {i+1} 行数据\n" * 10
def process_large_dataset(size: int) -> Generator[int, None, None]:
"""处理大数据集的生成器"""
for i in range(size):
# 模拟复杂计算
result = sum(j for j in range(i % 100))
yield result
def fibonacci_memory_efficient(n: int) -> Generator[int, None, None]:
"""内存高效的斐波那契生成器"""
a, b = 0, 1
for _ in range(n):
yield a
a, b = b, a + b
def prime_generator(limit: int) -> Generator[int, None, None]:
"""质数生成器(埃拉托斯特尼筛法的生成器版本)"""
if limit < 2:
return
yield 2
# 只考虑奇数
candidates = range(3, limit + 1, 2)
for num in candidates:
is_prime = True
# 只需要检查到sqrt(num)
for divisor in range(3, int(num**0.5) + 1, 2):
if num % divisor == 0:
is_prime = False
break
if is_prime:
yield num
# 内存使用对比
print("\n1. 内存使用对比:")
# 传统方式:一次性加载所有数据
def traditional_approach(size: int) -> list:
return [sum(j for j in range(i % 100)) for i in range(size)]
# 生成器方式
def generator_approach(size: int) -> Generator[int, None, None]:
return process_large_dataset(size)
size = 10000
# 测量传统方式的内存使用
start_time = time.time()
traditional_result = traditional_approach(size)
traditional_time = time.time() - start_time
traditional_memory = sys.getsizeof(traditional_result)
print(f"传统方式:")
print(f" 时间: {traditional_time:.4f}秒")
print(f" 内存: {traditional_memory:,}字节")
print(f" 前5个结果: {traditional_result[:5]}")
# 测量生成器方式的内存使用
start_time = time.time()
generator_result = generator_approach(size)
generator_memory = sys.getsizeof(generator_result)
# 只取前5个值来测试
first_five = [next(generator_result) for _ in range(5)]
generator_time = time.time() - start_time
print(f"\n生成器方式:")
print(f" 时间: {generator_time:.4f}秒")
print(f" 内存: {generator_memory:,}字节")
print(f" 前5个结果: {first_five}")
print(f" 内存节省: {(traditional_memory - generator_memory) / traditional_memory * 100:.1f}%")
print("\n2. 大文件处理:")
# 模拟处理大文件
file_reader = memory_efficient_file_reader("large_file.txt", chunk_size=100)
chunk_count = 0
total_chars = 0
for chunk in file_reader:
chunk_count += 1
total_chars += len(chunk)
if chunk_count <= 3: # 只显示前3个块
print(f"块 {chunk_count}: {len(chunk)} 字符")
elif chunk_count == 4:
print("...")
print(f"总共处理了 {chunk_count} 个块,{total_chars} 个字符")
print("\n3. 质数生成:")
primes = prime_generator(100)
print(f"100以内的质数: {list(primes)}")
print("\n4. 斐波那契数列:")
fib = fibonacci_memory_efficient(15)
print(f"前15个斐波那契数: {list(fib)}")
itertools模块
itertools模块提供了许多有用的迭代器工具:
# itertools模块应用
print("\n=== itertools模块应用 ===")
import itertools
from itertools import *
print("\n1. 无限迭代器:")
# count() - 无限计数
print("count(10, 2):", list(islice(count(10, 2), 5)))
# cycle() - 循环迭代
print("cycle('ABC'):", list(islice(cycle('ABC'), 8)))
# repeat() - 重复值
print("repeat('hello', 3):", list(repeat('hello', 3)))
print("\n2. 有限迭代器:")
# accumulate() - 累积
print("accumulate([1,2,3,4,5]):", list(accumulate([1, 2, 3, 4, 5])))
print("accumulate([1,2,3,4,5], operator.mul):", list(accumulate([1, 2, 3, 4, 5], lambda x, y: x * y)))
# chain() - 连接
print("chain([1,2], [3,4], [5,6]):", list(chain([1, 2], [3, 4], [5, 6])))
# compress() - 压缩
print("compress('ABCDEF', [1,0,1,0,1,1]):", list(compress('ABCDEF', [1, 0, 1, 0, 1, 1])))
# dropwhile() - 丢弃开头
print("dropwhile(lambda x: x<5, [1,4,6,4,1]):", list(dropwhile(lambda x: x < 5, [1, 4, 6, 4, 1])))
# takewhile() - 取开头
print("takewhile(lambda x: x<5, [1,4,6,4,1]):", list(takewhile(lambda x: x < 5, [1, 4, 6, 4, 1])))
# filterfalse() - 过滤假值
print("filterfalse(lambda x: x%2, range(10)):", list(filterfalse(lambda x: x % 2, range(10))))
# groupby() - 分组
data = [1, 1, 2, 2, 2, 3, 1, 1]
print("groupby([1,1,2,2,2,3,1,1]):")
for key, group in groupby(data):
print(f" {key}: {list(group)}")
print("\n3. 组合迭代器:")
# product() - 笛卡尔积
print("product('AB', '12'):", list(product('AB', '12')))
print("product([0,1], repeat=3):", list(product([0, 1], repeat=3)))
# permutations() - 排列
print("permutations('ABC', 2):", list(permutations('ABC', 2)))
# combinations() - 组合
print("combinations('ABCD', 2):", list(combinations('ABCD', 2)))
# combinations_with_replacement() - 可重复组合
print("combinations_with_replacement('AB', 2):", list(combinations_with_replacement('AB', 2)))
print("\n4. 实际应用示例:")
# 数据分析应用
def analyze_sales_data():
"""销售数据分析示例"""
# 模拟销售数据:(日期, 产品, 销量)
sales_data = [
('2024-01-01', 'A', 10),
('2024-01-01', 'B', 15),
('2024-01-01', 'A', 5),
('2024-01-02', 'A', 20),
('2024-01-02', 'B', 25),
('2024-01-02', 'C', 8),
('2024-01-03', 'A', 12),
('2024-01-03', 'C', 18),
]
# 按日期分组
sales_data.sort(key=lambda x: x[0]) # 先排序
print("按日期分组的销售数据:")
for date, group in groupby(sales_data, key=lambda x: x[0]):
daily_sales = list(group)
total = sum(item[2] for item in daily_sales)
print(f" {date}: {daily_sales}, 总计: {total}")
# 产品组合分析
products = ['A', 'B', 'C']
print("\n产品两两组合:")
for combo in combinations(products, 2):
print(f" 组合: {combo}")
# 累积销量
daily_totals = [35, 53, 30] # 每日总销量
cumulative = list(accumulate(daily_totals))
print(f"\n累积销量: {cumulative}")
analyze_sales_data()
# 数据处理管道
def data_processing_pipeline():
"""数据处理管道示例"""
# 原始数据
numbers = range(1, 21)
# 处理管道
# 1. 过滤偶数
evens = filter(lambda x: x % 2 == 0, numbers)
# 2. 平方
squares = map(lambda x: x ** 2, evens)
# 3. 累积求和
cumulative = accumulate(squares)
# 4. 取前5个
result = list(islice(cumulative, 5))
print(f"\n数据处理管道结果: {result}")
# 使用chain连接多个数据源
data1 = [1, 2, 3]
data2 = [4, 5, 6]
data3 = [7, 8, 9]
combined = chain(data1, data2, data3)
processed = map(lambda x: x * 2, combined)
filtered = filter(lambda x: x > 10, processed)
print(f"链式处理结果: {list(filtered)}")
data_processing_pipeline()
# 高级组合应用
def advanced_combinations()
## 性能优化与最佳实践
### 生成器性能优化
```python
# 生成器性能优化
print("\n=== 生成器性能优化 ===")
import time
import sys
from typing import Generator, List
def performance_comparison():
"""性能对比测试"""
n = 1000000
# 测试1:内存使用对比
print("\n1. 内存使用对比:")
# 列表方式
start_time = time.time()
list_result = [x * 2 for x in range(n)]
list_time = time.time() - start_time
list_memory = sys.getsizeof(list_result)
# 生成器方式
start_time = time.time()
gen_result = (x * 2 for x in range(n))
gen_time = time.time() - start_time
gen_memory = sys.getsizeof(gen_result)
print(f"列表方式: {list_time:.4f}秒, {list_memory:,}字节")
print(f"生成器方式: {gen_time:.4f}秒, {gen_memory:,}字节")
print(f"内存节省: {(list_memory - gen_memory) / list_memory * 100:.1f}%")
# 测试2:惰性求值的优势
print("\n2. 惰性求值优势:")
def expensive_operation(x):
"""模拟昂贵的操作"""
time.sleep(0.001) # 模拟耗时操作
return x ** 2
# 列表推导式(立即求值)
start_time = time.time()
list_comp = [expensive_operation(x) for x in range(100)]
first_five_list = list_comp[:5]
list_comp_time = time.time() - start_time
# 生成器表达式(惰性求值)
start_time = time.time()
gen_exp = (expensive_operation(x) for x in range(100))
first_five_gen = [next(gen_exp) for _ in range(5)]
gen_exp_time = time.time() - start_time
print(f"列表推导式(全部计算): {list_comp_time:.4f}秒")
print(f"生成器表达式(只计算5个): {gen_exp_time:.4f}秒")
print(f"速度提升: {list_comp_time / gen_exp_time:.1f}倍")
def optimized_generators():
"""优化的生成器示例"""
# 优化1:避免重复计算
def fibonacci_optimized(n: int) -> Generator[int, None, None]:
"""优化的斐波那契生成器"""
a, b = 0, 1
for _ in range(n):
yield a
a, b = b, a + b
# 优化2:使用生成器进行流式处理
def process_large_file_optimized(filename: str) -> Generator[str, None, None]:
"""优化的大文件处理"""
try:
with open(filename, 'r', encoding='utf-8') as file:
for line in file: # 逐行读取,而不是一次性读取全部
if line.strip(): # 跳过空行
yield line.strip().upper()
except FileNotFoundError:
# 模拟数据
for i in range(1000):
yield f"LINE {i+1}"
# 优化3:生成器链式处理
def chain_processing(data_source: Generator) -> Generator[str, None, None]:
"""链式处理生成器"""
for item in data_source:
if len(item) > 5: # 过滤
processed = item.replace('LINE', 'PROCESSED') # 转换
yield processed
print("\n3. 优化的生成器应用:")
# 斐波那契性能测试
start_time = time.time()
fib_gen = fibonacci_optimized(1000)
fib_list = list(fib_gen)
fib_time = time.time() - start_time
print(f"生成1000个斐波那契数: {fib_time:.4f}秒")
# 文件处理链
file_gen = process_large_file_optimized("large_file.txt")
processed_gen = chain_processing(file_gen)
# 只处理前10个
start_time = time.time()
results = [next(processed_gen) for _ in range(10)]
process_time = time.time() - start_time
print(f"处理前10行: {process_time:.4f}秒")
print(f"结果示例: {results[:3]}")
def memory_efficient_patterns():
"""内存高效的模式"""
# 模式1:分批处理
def batch_processor(data: Generator, batch_size: int) -> Generator[List, None, None]:
"""分批处理生成器"""
batch = []
for item in data:
batch.append(item)
if len(batch) >= batch_size:
yield batch
batch = []
if batch:
yield batch
# 模式2:滑动窗口
def sliding_window(data: Generator, window_size: int) -> Generator[List, None, None]:
"""滑动窗口生成器"""
window = []
for item in data:
window.append(item)
if len(window) > window_size:
window.pop(0)
if len(window) == window_size:
yield window.copy()
# 模式3:数据去重
def unique_generator(data: Generator) -> Generator[any, None, None]:
"""去重生成器"""
seen = set()
for item in data:
if item not in seen:
seen.add(item)
yield item
print("\n4. 内存高效模式:")
# 测试数据
test_data = (x % 10 for x in range(50)) # 0-9重复的数据
# 分批处理
batched = batch_processor(test_data, 5)
print("分批处理(批大小=5):")
for i, batch in enumerate(batched):
if i < 3:
print(f" 批次{i+1}: {batch}")
elif i == 3:
print(" ...")
break
# 滑动窗口
test_data2 = (x for x in range(10))
windowed = sliding_window(test_data2, 3)
print("\n滑动窗口(窗口大小=3):")
for window in windowed:
print(f" 窗口: {window}")
# 去重
test_data3 = (x % 5 for x in range(20))
unique_data = unique_generator(test_data3)
print(f"\n去重结果: {list(unique_data)}")
performance_comparison()
optimized_generators()
memory_efficient_patterns()
生成器调试技巧
# 生成器调试技巧
print("\n=== 生成器调试技巧 ===")
import functools
import inspect
from typing import Generator, Any
def debug_generator(func):
"""生成器调试装饰器"""
@functools.wraps(func)
def wrapper(*args, **kwargs):
gen = func(*args, **kwargs)
def debug_next():
try:
value = next(gen)
print(f"[DEBUG] {func.__name__} yielded: {value}")
return value
except StopIteration as e:
print(f"[DEBUG] {func.__name__} finished")
raise
# 创建调试生成器
while True:
try:
yield debug_next()
except StopIteration:
break
return wrapper
def trace_generator(gen: Generator, name: str = "Generator") -> Generator:
"""跟踪生成器的执行"""
print(f"[TRACE] {name} started")
try:
for i, value in enumerate(gen):
print(f"[TRACE] {name}[{i}]: {value}")
yield value
except Exception as e:
print(f"[TRACE] {name} error: {e}")
raise
finally:
print(f"[TRACE] {name} finished")
def profile_generator(gen: Generator, name: str = "Generator") -> Generator:
"""性能分析生成器"""
import time
start_time = time.time()
count = 0
try:
for value in gen:
count += 1
yield value
finally:
end_time = time.time()
print(f"[PROFILE] {name}: {count} items in {end_time - start_time:.4f}s")
if count > 0:
print(f"[PROFILE] {name}: {(end_time - start_time) / count * 1000:.4f}ms per item")
@debug_generator
def sample_generator(n: int) -> Generator[int, None, None]:
"""示例生成器"""
for i in range(n):
yield i * 2
def problematic_generator() -> Generator[int, None, None]:
"""有问题的生成器"""
for i in range(5):
if i == 3:
raise ValueError(f"Error at position {i}")
yield i
# 调试示例
print("\n1. 调试装饰器:")
debug_gen = sample_generator(5)
result = list(debug_gen)
print(f"结果: {result}")
print("\n2. 跟踪生成器:")
original_gen = (x ** 2 for x in range(5))
traced_gen = trace_generator(original_gen, "SquareGenerator")
result = list(traced_gen)
print("\n3. 性能分析:")
perf_gen = (x for x in range(1000))
profiled_gen = profile_generator(perf_gen, "RangeGenerator")
# 只取前100个来测试
result = [next(profiled_gen) for _ in range(100)]
print("\n4. 错误处理:")
try:
error_gen = problematic_generator()
traced_error_gen = trace_generator(error_gen, "ProblematicGenerator")
result = list(traced_error_gen)
except ValueError as e:
print(f"捕获异常: {e}")
# 生成器状态检查
def check_generator_state(gen: Generator, name: str = "Generator"):
"""检查生成器状态"""
state = inspect.getgeneratorstate(gen)
print(f"{name} 状态: {state}")
if hasattr(gen, 'gi_frame') and gen.gi_frame:
print(f" 当前行号: {gen.gi_frame.f_lineno}")
print(f" 局部变量: {gen.gi_frame.f_locals}")
print("\n5. 生成器状态检查:")
state_gen = sample_generator(3)
check_generator_state(state_gen, "StateGenerator")
# 取一个值
next(state_gen)
check_generator_state(state_gen, "StateGenerator")
# 取完所有值
list(state_gen)
check_generator_state(state_gen, "StateGenerator")
实践练习
练习1:数据流处理系统
创建一个完整的数据流处理系统,支持多种数据源和处理操作。
# 练习1:数据流处理系统
print("\n=== 练习1:数据流处理系统 ===")
import json
import csv
import random
from typing import Generator, Dict, Any, Callable, Optional
from abc import ABC, abstractmethod
from io import StringIO
from datetime import datetime, timedelta
class DataSource(ABC):
"""数据源抽象基类"""
@abstractmethod
def generate_data(self) -> Generator[Dict[str, Any], None, None]:
"""生成数据"""
pass
class CSVDataSource(DataSource):
"""CSV数据源"""
def __init__(self, csv_content: str):
self.csv_content = csv_content
def generate_data(self) -> Generator[Dict[str, Any], None, None]:
reader = csv.DictReader(StringIO(self.csv_content))
for row in reader:
yield row
class JSONDataSource(DataSource):
"""JSON数据源"""
def __init__(self, json_data: list):
self.json_data = json_data
def generate_data(self) -> Generator[Dict[str, Any], None, None]:
for item in self.json_data:
yield item
class RandomDataSource(DataSource):
"""随机数据源"""
def __init__(self, count: int, seed: Optional[int] = None):
self.count = count
if seed:
random.seed(seed)
def generate_data(self) -> Generator[Dict[str, Any], None, None]:
for i in range(self.count):
yield {
'id': i + 1,
'value': random.randint(1, 100),
'category': random.choice(['A', 'B', 'C']),
'timestamp': datetime.now() - timedelta(days=random.randint(0, 30))
}
class DataProcessor:
"""数据处理器"""
def __init__(self, data_source: DataSource):
self.data_source = data_source
self.processors = []
def filter(self, predicate: Callable[[Dict[str, Any]], bool]) -> 'DataProcessor':
"""添加过滤器"""
def filter_processor(data_stream):
for item in data_stream:
if predicate(item):
yield item
self.processors.append(filter_processor)
return self
def map(self, transformer: Callable[[Dict[str, Any]], Dict[str, Any]]) -> 'DataProcessor':
"""添加转换器"""
def map_processor(data_stream):
for item in data_stream:
yield transformer(item)
self.processors.append(map_processor)
return self
def group_by(self, key_func: Callable[[Dict[str, Any]], str]) -> 'DataProcessor':
"""按键分组"""
def group_processor(data_stream):
groups = {}
for item in data_stream:
key = key_func(item)
if key not in groups:
groups[key] = []
groups[key].append(item)
for key, items in groups.items():
yield {'group_key': key, 'items': items, 'count': len(items)}
self.processors.append(group_processor)
return self
def aggregate(self, agg_func: Callable[[list], Dict[str, Any]]) -> 'DataProcessor':
"""聚合数据"""
def agg_processor(data_stream):
items = list(data_stream)
if items:
yield agg_func(items)
self.processors.append(agg_processor)
return self
def take(self, count: int) -> 'DataProcessor':
"""限制数量"""
def take_processor(data_stream):
taken = 0
for item in data_stream:
if taken >= count:
break
yield item
taken += 1
self.processors.append(take_processor)
return self
def execute(self) -> Generator[Dict[str, Any], None, None]:
"""执行处理管道"""
data_stream = self.data_source.generate_data()
for processor in self.processors:
data_stream = processor(data_stream)
return data_stream
class DataSink:
"""数据输出"""
@staticmethod
def to_list(data_stream: Generator) -> list:
"""转换为列表"""
return list(data_stream)
@staticmethod
def to_json(data_stream: Generator, indent: int = 2) -> str:
"""转换为JSON字符串"""
data = list(data_stream)
return json.dumps(data, indent=indent, default=str)
@staticmethod
def to_csv(data_stream: Generator) -> str:
"""转换为CSV字符串"""
data = list(data_stream)
if not data:
return ""
output = StringIO()
fieldnames = data[0].keys()
writer = csv.DictWriter(output, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
return output.getvalue()
@staticmethod
def print_summary(data_stream: Generator, title: str = "数据摘要"):
"""打印数据摘要"""
data = list(data_stream)
print(f"\n{title}:")
print(f" 记录数: {len(data)}")
if data:
print(f" 字段: {list(data[0].keys())}")
print(f" 前3条记录:")
for i, record in enumerate(data[:3], 1):
print(f" {i}. {record}")
if len(data) > 3:
print(f" ... 还有 {len(data) - 3} 条记录")
# 演示数据流处理系统
print("\n=== 数据流处理系统演示 ===")
# 准备测试数据
csv_data = """id,name,age,department,salary
1,Alice,25,Engineering,75000
2,Bob,30,Engineering,80000
3,Charlie,35,Marketing,65000
4,Diana,28,Engineering,78000
5,Eve,32,Marketing,70000
6,Frank,29,Sales,60000
7,Grace,31,Engineering,82000
8,Henry,27,Sales,58000"""
json_data = [
{'product': 'A', 'sales': 100, 'region': 'North'},
{'product': 'B', 'sales': 150, 'region': 'South'},
{'product': 'A', 'sales': 120, 'region': 'East'},
{'product': 'C', 'sales': 80, 'region': 'West'},
{'product': 'B', 'sales': 200, 'region': 'North'},
]
print("\n1. CSV数据处理:")
# 处理CSV数据:筛选工程部门,计算平均薪资
csv_source = CSVDataSource(csv_data)
csv_processor = DataProcessor(csv_source)
result = (csv_processor
.filter(lambda x: x['department'] == 'Engineering')
.map(lambda x: {**x, 'salary': int(x['salary'])})
.aggregate(lambda items: {
'department': 'Engineering',
'count': len(items),
'avg_salary': sum(item['salary'] for item in items) / len(items),
'total_salary': sum(item['salary'] for item in items)
})
.execute())
DataSink.print_summary(result, "工程部门薪资统计")
print("\n2. JSON数据处理:")
# 处理JSON数据:按产品分组,计算销售总额
json_source = JSONDataSource(json_data)
json_processor = DataProcessor(json_source)
result = (json_processor
.group_by(lambda x: x['product'])
.map(lambda group: {
'product': group['group_key'],
'total_sales': sum(item['sales'] for item in group['items']),
'regions': list(set(item['region'] for item in group['items'])),
'avg_sales': sum(item['sales'] for item in group['items']) / len(group['items'])
})
.execute())
DataSink.print_summary(result, "产品销售统计")
print("\n3. 随机数据处理:")
# 处理随机数据:过滤高值数据,按类别分组
random_source = RandomDataSource(20, seed=42)
random_processor = DataProcessor(random_source)
result = (random_processor
.filter(lambda x: x['value'] > 50)
.map(lambda x: {**x, 'value_grade': 'High' if x['value'] > 80 else 'Medium'})
.group_by(lambda x: x['category'])
.take(3)
.execute())
DataSink.print_summary(result, "随机数据分析")
print("\n4. 复杂数据管道:")
# 复杂的数据处理管道
complex_processor = DataProcessor(csv_source)
result = (complex_processor
.filter(lambda x: int(x['age']) >= 30) # 年龄>=30
.map(lambda x: { # 添加薪资等级
**x,
'age': int(x['age']),
'salary': int(x['salary']),
'salary_grade': 'A' if int(x['salary']) >= 80000 else 'B' if int(x['salary']) >= 70000 else 'C'
})
.group_by(lambda x: x['department']) # 按部门分组
.map(lambda group: { # 计算部门统计
'department': group['group_key'],
'employee_count': group['count'],
'avg_age': sum(emp['age'] for emp in group['items']) / len(group['items']),
'avg_salary': sum(emp['salary'] for emp in group['items']) / len(group['items']),
'salary_grades': {grade: sum(1 for emp in group['items'] if emp['salary_grade'] == grade)
for grade in ['A', 'B', 'C']}
})
.execute())
DataSink.print_summary(result, "部门综合分析(年龄>=30)")
# 输出为不同格式
print("\n5. 数据输出格式:")
simple_data = DataProcessor(JSONDataSource(json_data[:3])).execute()
# 转换为JSON
json_output = DataSink.to_json(simple_data)
print(f"JSON格式:\n{json_output}")
# 重新生成数据(因为生成器已耗尽)
simple_data2 = DataProcessor(JSONDataSource(json_data[:3])).execute()
csv_output = DataSink.to_csv(simple_data2)
print(f"\nCSV格式:\n{csv_output}")
练习2:实时数据监控系统
创建一个实时数据监控系统,使用生成器处理连续的数据流。
# 练习2:实时数据监控系统
print("\n=== 练习2:实时数据监控系统 ===")
import time
import random
import threading
from collections import deque
from typing import Generator, Dict, Any, Optional
from datetime import datetime
from queue import Queue, Empty
class RealTimeDataSource:
"""实时数据源"""
def __init__(self, source_name: str):
self.source_name = source_name
self.is_running = False
self.data_queue = Queue()
self.thread = None
def start(self):
"""启动数据生成"""
if not self.is_running:
self.is_running = True
self.thread = threading.Thread(target=self._generate_data)
self.thread.daemon = True
self.thread.start()
def stop(self):
"""停止数据生成"""
self.is_running = False
if self.thread:
self.thread.join()
def _generate_data(self):
"""生成模拟数据"""
counter = 0
while self.is_running:
data = {
'source': self.source_name,
'timestamp': datetime.now(),
'value': random.uniform(0, 100),
'status': random.choice(['normal', 'warning', 'error']),
'counter': counter
}
self.data_queue.put(data)
counter += 1
time.sleep(random.uniform(0.1, 0.5)) # 随机间隔
def get_data_stream(self) -> Generator[Dict[str, Any], None, None]:
"""获取数据流"""
while True:
try:
data = self.data_queue.get(timeout=1.0)
yield data
except Empty:
if not self.is_running:
break
continue
class DataMonitor:
"""数据监控器"""
def __init__(self, window_size: int = 10):
self.window_size = window_size
self.data_window = deque(maxlen=window_size)
self.alerts = []
def process_data_stream(self, data_stream: Generator) -> Generator[Dict[str, Any], None, None]:
"""处理数据流"""
for data in data_stream:
self.data_window.append(data)
# 计算统计信息
stats = self._calculate_stats()
# 检查异常
alerts = self._check_alerts(data, stats)
# 生成监控结果
result = {
'timestamp': data['timestamp'],
'source': data['source'],
'current_value': data['value'],
'current_status': data['status'],
'stats': stats,
'alerts': alerts,
'window_size': len(self.data_window)
}
yield result
def _calculate_stats(self) -> Dict[str, Any]:
"""计算统计信息"""
if not self.data_window:
return {}
values = [item['value'] for item in self.data_window]
statuses = [item['status'] for item in self.data_window]
return {
'avg_value': sum(values) / len(values),
'min_value': min(values),
'max_value': max(values),
'status_counts': {status: statuses.count(status) for status in set(statuses)},
'trend': self._calculate_trend(values)
}
def _calculate_trend(self, values: list) -> str:
"""计算趋势"""
if len(values) < 2:
return 'stable'
recent_avg = sum(values[-3:]) / min(3, len(values))
older_avg = sum(values[:-3]) / max(1, len(values) - 3) if len(values) > 3 else recent_avg
if recent_avg > older_avg * 1.1:
return 'increasing'
elif recent_avg < older_avg * 0.9:
return 'decreasing'
else:
return 'stable'
def _check_alerts(self, current_data: Dict[str, Any], stats: Dict[str, Any]) -> list:
"""检查告警"""
alerts = []
# 值异常告警
if current_data['value'] > 90:
alerts.append({
'type': 'high_value',
'message': f"值过高: {current_data['value']:.2f}",
'severity': 'warning'
})
if current_data['value'] < 10:
alerts.append({
'type': 'low_value',
'message': f"值过低: {current_data['value']:.2f}",
'severity': 'warning'
})
# 状态告警
if current_data['status'] == 'error':
alerts.append({
'type': 'status_error',
'message': "系统状态异常",
'severity': 'error'
})
# 趋势告警
if stats.get('trend') == 'increasing' and stats.get('avg_value', 0) > 80:
alerts.append({
'type': 'trend_alert',
'message': "值持续上升且平均值过高",
'severity': 'warning'
})
return alerts
class AlertManager:
"""告警管理器"""
def __init__(self):
self.alert_history = deque(maxlen=100)
self.alert_counts = {'error': 0, 'warning': 0}
def process_alerts(self, monitor_stream: Generator) -> Generator[Dict[str, Any], None, None]:
"""处理告警"""
for monitor_data in monitor_stream:
alerts = monitor_data.get('alerts', [])
# 记录告警
for alert in alerts:
self.alert_history.append({
**alert,
'timestamp': monitor_data['timestamp'],
'source': monitor_data['source']
})
self.alert_counts[alert['severity']] += 1
# 生成告警摘要
alert_summary = {
'current_alerts': len(alerts),
'total_errors': self.alert_counts['error'],
'total_warnings': self.alert_counts['warning'],
'recent_alerts': list(self.alert_history)[-5:] if self.alert_history else []
}
result = {
**monitor_data,
'alert_summary': alert_summary
}
yield result
class DashboardDisplay:
"""仪表板显示"""
@staticmethod
def display_real_time(alert_stream: Generator, max_updates: int = 20):
"""实时显示"""
update_count = 0
for data in alert_stream:
update_count += 1
if update_count > max_updates:
break
# 清屏(简化版)
print("\n" + "="*80)
print(f"实时监控仪表板 - 更新 #{update_count}")
print("="*80)
# 基本信息
print(f"时间: {data['timestamp'].strftime('%H:%M:%S')}")
print(f"数据源: {data['source']}")
print(f"当前值: {data['current_value']:.2f}")
print(f"当前状态: {data['current_status']}")
# 统计信息
stats = data.get('stats', {})
if stats:
print(f"\n统计信息(窗口大小: {data['window_size']}):")
print(f" 平均值: {stats.get('avg_value', 0):.2f}")
print(f" 最小值: {stats.get('min_value', 0):.2f}")
print(f" 最大值: {stats.get('max_value', 0):.2f}")
print(f" 趋势: {stats.get('trend', 'unknown')}")
print(f" 状态分布: {stats.get('status_counts', {})}")
# 当前告警
current_alerts = data.get('alerts', [])
if current_alerts:
print(f"\n🚨 当前告警 ({len(current_alerts)}):")
for alert in current_alerts:
severity_icon = "🔴" if alert['severity'] == 'error' else "🟡"
print(f" {severity_icon} {alert['message']}")
else:
print("\n✅ 无当前告警")
# 告警摘要
alert_summary = data.get('alert_summary', {})
print(f"\n告警统计:")
print(f" 总错误数: {alert_summary.get('total_errors', 0)}")
print(f" 总警告数: {alert_summary.get('total_warnings', 0)}")
# 最近告警
recent_alerts = alert_summary.get('recent_alerts', [])
if recent_alerts:
print(f"\n最近告警:")
for alert in recent_alerts[-3:]:
time_str = alert['timestamp'].strftime('%H:%M:%S')
print(f" [{time_str}] {alert['message']}")
time.sleep(1) # 控制更新频率
# 演示实时监控系统
print("\n=== 实时监控系统演示 ===")
# 创建数据源
data_source = RealTimeDataSource("Sensor-001")
# 创建监控组件
monitor = DataMonitor(window_size=5)
alert_manager = AlertManager()
print("\n启动实时监控(将运行20次更新)...")
print("监控指标:值范围、状态变化、趋势分析")
print("告警规则:值>90或<10、状态错误、持续上升趋势")
try:
# 启动数据源
data_source.start()
# 创建处理管道
data_stream = data_source.get_data_stream()
monitor_stream = monitor.process_data_stream(data_stream)
alert_stream = alert_manager.process_alerts(monitor_stream)
# 显示实时数据
DashboardDisplay.display_real_time(alert_stream, max_updates=20)
finally:
# 停止数据源
data_source.stop()
print("\n监控系统已停止")
print("\n监控系统演示完成")
print("系统特点:")
print("- 实时数据生成和处理")
print("- 滑动窗口统计分析")
print("- 多级告警检测")
print("- 实时仪表板显示")
print("- 内存高效的流式处理")
总结
核心知识点
-
迭代器协议
__iter__()和__next__()方法- 可迭代对象与迭代器的区别
- StopIteration异常的作用
-
生成器函数
yield关键字的使用- 生成器的状态保持
- 生成器方法:
send()、throw()、close()
-
生成器表达式
- 简洁的生成器创建语法
- 内存效率优势
- 与列表推导式的对比
-
协程基础
- 基于生成器的协程实现
- 双向通信机制
- 协程状态管理
-
itertools模块
- 无限迭代器:
count()、cycle()、repeat() - 有限迭代器:
accumulate()、chain()、compress()等 - 组合迭代器:
product()、permutations()、combinations()
- 无限迭代器:
技能掌握
初级技能
- 理解迭代器协议的基本概念
- 能够创建简单的生成器函数
- 使用生成器表达式进行基本数据处理
- 了解
itertools模块的常用函数
中级技能
- 创建复杂的自定义迭代器
- 使用生成器进行内存优化
- 理解协程的基本概念和应用
- 设计高效的数据处理管道
高级技能
- 设计复杂的生成器系统
- 优化生成器性能
- 实现高级的协程模式
- 解决大数据处理中的内存问题
最佳实践
-
内存效率
- 优先使用生成器处理大数据集
- 避免不必要的数据加载到内存
- 使用流式处理代替批量处理
-
性能优化
- 合理设计生成器链
- 避免在生成器中进行重复计算
- 使用适当的缓冲策略
-
错误处理
- 妥善处理StopIteration异常
- 在生成器中实现适当的错误恢复
- 使用try-finally确保资源清理
-
代码可读性
- 为复杂的生成器添加文档
- 使用有意义的变量名
- 适当分解复杂的生成器逻辑
常见陷阱
-
生成器耗尽
- 生成器只能迭代一次
- 需要重新创建生成器来重复迭代
-
内存泄漏
- 生成器中的循环引用
- 未正确关闭的资源
-
性能误区
- 过度使用生成器导致调用开销
- 不适当的生成器嵌套
-
调试困难
- 生成器的惰性求值使调试复杂
- 状态难以跟踪
性能考虑
-
内存使用
- 生成器显著减少内存占用
- 适合处理大数据集
- 避免一次性加载所有数据
-
计算效率
- 惰性求值避免不必要的计算
- 流式处理提高响应速度
- 合理的缓冲策略平衡内存和性能
-
I/O优化
- 生成器适合处理文件和网络数据
- 减少I/O阻塞时间
- 支持实时数据处理
下一步学习
-
深入学习
- 异步生成器和async/await
- 更复杂的协程模式
- 生成器在并发编程中的应用
-
实践项目
- 构建数据处理管道
- 实现实时数据分析系统
- 创建内存高效的文件处理工具
-
相关主题
- 异步编程和协程
- 函数式编程
- 并发和并行处理
扩展阅读
-
官方文档
-
进阶资源
- “Fluent Python” - Luciano Ramalho
- “Effective Python” - Brett Slatkin
- “Python Tricks” - Dan Bader
-
实际应用
- 数据科学中的生成器应用
- Web开发中的流式响应
- 系统监控和日志处理
学习建议:生成器和迭代器是Python中非常重要的概念,建议通过大量实践来掌握。从简单的生成器开始,逐步学习更复杂的应用模式,特别关注内存效率和性能优化方面的应用。
下一章节:023-上下文管理器:
“”“高级组合应用”“”
# 生成所有可能的密码组合(简化版)
digits = ‘0123456789’
letters = ‘abcdefghijklmnopqrstuvwxyz’
# 4位数字密码
four_digit_passwords = product(digits, repeat=4)
print(f"\n4位数字密码总数: {len(list(four_digit_passwords))}")
# 2位字母+2位数字的组合
letter_digit_combos = product(letters, repeat=2), product(digits, repeat=2)
mixed_passwords = product(*letter_digit_combos)
# 只显示前几个
sample_passwords = list(islice(mixed_passwords, 5))
print(f"混合密码示例: {sample_passwords}")
# 团队分组
team_members = ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank']
print(f"\n从{len(team_members)}人中选3人的所有组合:")
for i, team in enumerate(combinations(team_members, 3), 1):
if i <= 5: # 只显示前5个
print(f" 团队{i}: {team}")
elif i == 6:
print(" ...")
total_combinations = len(list(combinations(team_members, 3)))
print(f" 总共有 {total_combinations} 种组合")
advanced_combinations()


486

被折叠的 条评论
为什么被折叠?



