NumPy ndarray对象完全指南：创建、属性与方法详解

原创于 2026-04-07 20:35:55 发布 · 495 阅读

6 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

标签

#numpy #python

Python嗦粉指南专栏收录该内容

20 篇文章

订阅专栏

开发板推荐：天空星STM32F407VET6开发板

超高性价比 STM32主控 | 超高主频 | 一板兼容百芯 | 比赛神器 | 沉金彩色丝印

点击查看

本文深入讲解NumPy核心数据结构ndarray的创建、属性、方法及各种实用操作，包含大量实例和性能测试。

1. NumPy ndarray基础介绍

NumPy是Python科学计算的基础库，其核心是ndarray（N-dimensional array，N维数组）对象。相比于Python列表，ndarray提供了更高效的多维数组操作。

import numpy as np
print(f"NumPy版本: {np.__version__}")

2. 基础ndarray示例与属性详解

2.1 创建示例数组

# 创建3×5的二维数组
arr1 = np.arange(15).reshape(3, 5)
print("arr1数组:")
print(arr1)
print(f"数组内容:\n{arr1}")
print(f"数组类型: {arr1.dtype}")  # 数据类型
print(f"数组维度: {arr1.ndim}")   # 维度数
print(f"数组形状: {arr1.shape}")  # 形状 (行, 列)
print(f"数组大小: {arr1.size}")   # 元素总数
print(f"元素字节数: {arr1.itemsize}")  # 每个元素的字节大小
print(f"数组总字节数: {arr1.nbytes}")  # 总字节数 = size * itemsize
print(f"数组步幅: {arr1.strides}")  # 每个维度上移动到下一个元素所需的字节数

输出结果：

arr1数组:
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]

数组类型: int64
数组维度: 2
数组形状: (3, 5)
数组大小: 15
元素字节数: 8
数组总字节数: 120
数组步幅: (40, 8)  # 每行40字节(5个元素×8字节)，每列8字节

2.2 属性详细解析

# 创建不同形状的数组
arr2 = np.array([1, 2, 3, 4, 5])  # 一维数组
arr3 = np.array([[1, 2, 3], [4, 5, 6]])  # 二维数组
arr4 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # 三维数组

arrays = [("一维数组", arr2), ("二维数组", arr3), ("三维数组", arr4)]

print("不同维度数组属性对比:")
print("=" * 80)
for name, arr in arrays:
    print(f"\n{name}:")
    print(f"  数组: {arr}")
    print(f"  dtype: {arr.dtype} (数据类型)")
    print(f"  ndim: {arr.ndim} (维度数)")
    print(f"  shape: {arr.shape} (形状)")
    print(f"  size: {arr.size} (元素总数)")
    print(f"  itemsize: {arr.itemsize}字节 (每个元素大小)")
    print(f"  nbytes: {arr.nbytes}字节 (总内存占用)")
    print(f"  strides: {arr.strides} (步幅)")
    
    # 计算验证
    if arr.ndim > 0:
        calculated_size = 1
        for dim in arr.shape:
            calculated_size *= dim
        print(f"  验证size计算: {arr.shape} -> {calculated_size} = {arr.size}")

3. 创建ndarray的多种方法

3.1 从Python列表/元组创建

print("=== 从Python列表/元组创建 ===")

# 从列表创建
list_data = [1, 2, 3, 4, 5]
arr_from_list = np.array(list_data)
print(f"从列表创建: {arr_from_list}")

# 从嵌套列表创建二维数组
list_2d = [[1, 2, 3], [4, 5, 6]]
arr_2d_from_list = np.array(list_2d)
print(f"从嵌套列表创建2D数组:\n{arr_2d_from_list}")

# 从元组创建
tuple_data = (1.0, 2.0, 3.0, 4.0)
arr_from_tuple = np.array(tuple_data)
print(f"从元组创建: {arr_from_tuple}")

# 指定数据类型
arr_int32 = np.array([1, 2, 3], dtype=np.int32)
arr_float64 = np.array([1, 2, 3], dtype=np.float64)
arr_complex = np.array([1, 2, 3], dtype=np.complex128)
print(f"指定int32: {arr_int32}, dtype={arr_int32.dtype}")
print(f"指定float64: {arr_float64}, dtype={arr_float64.dtype}")
print(f"指定complex128: {arr_complex}, dtype={arr_complex.dtype}")

3.2 使用np.arange()创建数组

np.arange()类似于Python的range()，但返回NumPy数组。

print("\n=== 使用np.arange()创建数组 ===")

# 基本用法
arr1 = np.arange(10)  # 0到9
print(f"arange(10): {arr1}")

# 指定起始值
arr2 = np.arange(5, 10)  # 5到9
print(f"arange(5, 10): {arr2}")

# 指定步长
arr3 = np.arange(0, 10, 2)  # 0,2,4,6,8
print(f"arange(0, 10, 2): {arr3}")

# 浮点数
arr4 = np.arange(0, 1, 0.1)  # 0.0, 0.1, 0.2, ..., 0.9
print(f"arange(0, 1, 0.1): {arr4}")

# 负数步长
arr5 = np.arange(10, 0, -2)  # 10,8,6,4,2
print(f"arange(10, 0, -2): {arr5}")

# 性能测试
import time

size = 1000000
print(f"\n性能测试: 创建{size:,}个元素的数组")

# np.arange
start = time.time()
arr_arange = np.arange(size)
time_arange = time.time() - start
print(f"np.arange: {time_arange:.6f}秒")

# Python range (转换为list)
start = time.time()
list_range = list(range(size))
time_range_list = time.time() - start
print(f"Python range转list: {time_range_list:.6f}秒")

# Python range (不转换)
start = time.time()
range_obj = range(size)
time_range = time.time() - start
print(f"Python range对象: {time_range:.6f}秒")

print(f"np.arange比list(range)快{time_range_list/time_arange:.1f}倍")

# 内存占用对比
print(f"\n内存占用对比:")
print(f"np.arange数组: {arr_arange.nbytes:,}字节")
print(f"Python列表: {list_range.__sizeof__():,}字节")

3.3 使用随机函数创建数组

NumPy提供了多种随机数生成函数，是机器学习和数据科学中的重要工具。

print("\n=== 使用随机函数创建数组 ===")

# 1. np.random.rand() - 均匀分布 [0, 1)
print("1. np.random.rand() - [0,1)均匀分布")
np.random.seed(42)  # 设置随机种子，保证结果可复现

# 创建一维数组
rand_1d = np.random.rand(5)
print(f"  rand(5): {rand_1d}")

# 创建二维数组
rand_2d = np.random.rand(3, 4)
print(f"  rand(3,4):\n{rand_2d}")

# 创建三维数组
rand_3d = np.random.rand(2, 3, 4)
print(f"  rand(2,3,4)形状: {rand_3d.shape}")

# 2. np.random.randint() - 整数均匀分布
print("\n2. np.random.randint() - 整数均匀分布")

# 基本用法: [low, high)
randint_simple = np.random.randint(0, 10, 5)
print(f"  randint(0, 10, 5): {randint_simple}")

# 指定形状
randint_2d = np.random.randint(0, 100, (3, 4))
print(f"  randint(0, 100, (3,4)):\n{randint_2d}")

# 只指定high参数
randint_high_only = np.random.randint(10, size=5)
print(f"  randint(10, size=5): {randint_high_only}")

# 3. np.random.uniform() - 均匀分布 [low, high)
print("\n3. np.random.uniform() - 均匀分布")

# 基本用法
uniform_simple = np.random.uniform(0, 1, 5)
print(f"  uniform(0, 1, 5): {uniform_simple}")

# 指定不同范围
uniform_range = np.random.uniform(-5, 5, 5)
print(f"  uniform(-5, 5, 5): {uniform_range}")

# 创建二维数组
uniform_2d = np.random.uniform(0, 10, (2, 3))
print(f"  uniform(0, 10, (2,3)):\n{uniform_2d}")

# 4. 其他随机函数
print("\n4. 其他随机分布函数")

# 正态分布
normal = np.random.randn(5)  # 标准正态分布
print(f"  randn(5) 标准正态分布: {normal}")

# 指定均值和标准差的正态分布
normal_custom = np.random.normal(10, 2, 5)  # 均值=10, 标准差=2
print(f"  normal(10, 2, 5): {normal_custom}")

# 指数分布
exponential = np.random.exponential(1.0, 5)  # 尺度参数=1.0
print(f"  exponential(1.0, 5): {exponential}")

# 随机选择
choice = np.random.choice(['A', 'B', 'C', 'D'], 10)
print(f"  choice(['A','B','C','D'], 10): {choice}")

# 5. 随机种子设置的重要性
print("\n5. 随机种子设置的重要性")

# 不设置种子
np.random.seed(None)
random1 = np.random.rand(3)
random2 = np.random.rand(3)
print(f"  不设置种子: {random1}, {random2}")
print(f"  是否相同: {np.array_equal(random1, random2)}")

# 设置相同种子
np.random.seed(123)
random3 = np.random.rand(3)
np.random.seed(123)
random4 = np.random.rand(3)
print(f"  设置相同种子: {random3}, {random4}")
print(f"  是否相同: {np.array_equal(random3, random4)}")

# 6. 实用示例：创建数据集
print("\n6. 实用示例：创建模拟数据集")

def create_sample_dataset(n_samples=100):
    """创建模拟数据集"""
    np.random.seed(42)
    
    # 特征
    age = np.random.randint(18, 65, n_samples)  # 年龄
    income = np.random.normal(50000, 15000, n_samples)  # 收入
    education = np.random.choice([1, 2, 3, 4], n_samples, p=[0.1, 0.3, 0.4, 0.2])  # 教育程度
    experience = np.random.exponential(10, n_samples)  # 工作经验
    
    # 目标变量
    salary = 20000 + 500*age + 0.8*income + 10000*education + 2000*experience + np.random.randn(n_samples)*5000
    
    # 组合成数据集
    data = np.column_stack([age, income, education, experience, salary])
    
    return data

dataset = create_sample_dataset(5)
print("模拟数据集 (年龄, 收入, 教育程度, 工作经验, 薪资):")
for i, row in enumerate(dataset):
    print(f"  样本{i}: [{row[0]:.0f}, {row[1]:.0f}, {row[2]:.0f}, {row[3]:.1f}, {row[4]:.0f}]")

3.4 特殊数组创建函数

print("\n=== 特殊数组创建函数 ===")

# 1. np.zeros() - 全零数组
print("1. np.zeros() - 全零数组")

zeros_1d = np.zeros(5)
print(f"  zeros(5): {zeros_1d}")

zeros_2d = np.zeros((3, 4))
print(f"  zeros((3,4)):\n{zeros_2d}")

zeros_3d = np.zeros((2, 3, 4))
print(f"  zeros((2,3,4))形状: {zeros_3d.shape}")

# 指定数据类型
zeros_int = np.zeros(5, dtype=np.int32)
zeros_complex = np.zeros(5, dtype=np.complex128)
print(f"  zeros(5, dtype=int32): {zeros_int}")
print(f"  zeros(5, dtype=complex128): {zeros_complex}")

# 2. np.ones() - 全1数组
print("\n2. np.ones() - 全1数组")

ones_1d = np.ones(5)
print(f"  ones(5): {ones_1d}")

ones_2d = np.ones((2, 3))
print(f"  ones((2,3)):\n{ones_2d}")

# 创建指定值的数组
value = 3.14
arr_value = np.ones((2, 3)) * value
print(f"  创建值全为{value}的数组:\n{arr_value}")

# 3. np.empty() - 未初始化数组
print("\n3. np.empty() - 未初始化数组")

empty_arr = np.empty(5)
print(f"  empty(5): {empty_arr} (值未初始化，是内存中的随机值)")

empty_2d = np.empty((2, 3))
print(f"  empty((2,3)):\n{empty_2d}")

# 注意：np.empty()不初始化数组，速度比zeros()和ones()快
# 但值是不确定的，使用时要注意

# 4. np.full() - 填充指定值的数组
print("\n4. np.full() - 填充指定值的数组")

full_arr = np.full(5, 7)  # 填充7
print(f"  full(5, 7): {full_arr}")

full_2d = np.full((3, 4), 3.14)
print(f"  full((3,4), 3.14):\n{full_2d}")

# 5. np.eye() / np.identity() - 单位矩阵
print("\n5. np.eye() / np.identity() - 单位矩阵")

eye_3 = np.eye(3)  # 3×3单位矩阵
print(f"  eye(3):\n{eye_3}")

eye_2x3 = np.eye(2, 3)  # 2×3矩阵，主对角线为1
print(f"  eye(2,3):\n{eye_2x3}")

# 对角线偏移
eye_offset = np.eye(4, k=1)  # 对角线向上偏移1
print(f"  eye(4, k=1):\n{eye_offset}")

identity_3 = np.identity(3)  # 只能是方阵
print(f"  identity(3):\n{identity_3}")

# 6. np.diag() - 对角矩阵
print("\n6. np.diag() - 对角矩阵")

# 从一维数组创建对角矩阵
diag_from_1d = np.diag([1, 2, 3])
print(f"  diag([1,2,3]):\n{diag_from_1d}")

# 从矩阵提取对角线
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
diag_elements = np.diag(matrix)
print(f"  diag(矩阵)提取对角线: {diag_elements}")

# 7. 性能对比
print("\n7. 创建函数性能对比")

size = 1000000
print(f"创建{size:,}个元素的数组:")

# zeros
start = time.time()
z = np.zeros(size)
time_zeros = time.time() - start

# ones
start = time.time()
o = np.ones(size)
time_ones = time.time() - start

# empty
start = time.time()
e = np.empty(size)
time_empty = time.time() - start

# full
start = time.time()
f = np.full(size, 3.14)
time_full = time.time() - start

print(f"  zeros(): {time_zeros:.6f}秒")
print(f"  ones(): {time_ones:.6f}秒")
print(f"  empty(): {time_empty:.6f}秒 (最快)")
print(f"  full(): {time_full:.6f}秒")

# 内存验证
print(f"\n内存验证 (应相同):")
print(f"  zeros: {z.nbytes:,}字节")
print(f"  ones: {o.nbytes:,}字节")
print(f"  empty: {e.nbytes:,}字节")
print(f"  full: {f.nbytes:,}字节")

3.5 数值序列创建函数

print("\n=== 数值序列创建函数 ===")

# 1. np.linspace() - 线性间隔序列
print("1. np.linspace() - 线性间隔序列")

# 生成0到1之间等间隔的5个数
linspace_simple = np.linspace(0, 1, 5)
print(f"  linspace(0, 1, 5): {linspace_simple}")

# 生成0到10之间等间隔的11个数
linspace_0_to_10 = np.linspace(0, 10, 11)
print(f"  linspace(0, 10, 11): {linspace_0_to_10}")

# 包含端点验证
linspace_with_endpoint = np.linspace(0, 1, 5, endpoint=True)  # 默认包含
linspace_without_endpoint = np.linspace(0, 1, 5, endpoint=False)  # 不包含
print(f"  linspace(0,1,5,endpoint=True): {linspace_with_endpoint}")
print(f"  linspace(0,1,5,endpoint=False): {linspace_without_endpoint}")

# 计算步长
linspace_result = np.linspace(0, 10, 5)
step = (linspace_result[-1] - linspace_result[0]) / (len(linspace_result) - 1)
print(f"  linspace(0,10,5)步长: {step:.2f}")

# 2. np.logspace() - 对数间隔序列
print("\n2. np.logspace() - 对数间隔序列")

# 生成10^0到10^2之间的5个对数间隔数
logspace_simple = np.logspace(0, 2, 5)
print(f"  logspace(0, 2, 5): {logspace_simple}")
print(f"  取对数验证: {np.log10(logspace_simple)}")

# 使用不同底数
logspace_base2 = np.logspace(0, 3, 4, base=2)  # 2^0到2^3
print(f"  logspace(0, 3, 4, base=2): {logspace_base2}")

# 3. np.geomspace() - 几何间隔序列
print("\n3. np.geomspace() - 几何间隔序列")

# 生成1到1000之间的5个几何间隔数
geomspace_result = np.geomspace(1, 1000, 5)
print(f"  geomspace(1, 1000, 5): {geomspace_result}")
print(f"  比值验证: {np.diff(np.log10(geomspace_result))}")  # 对数间隔应相等

# 4. 实用示例：创建频率轴
print("\n4. 实用示例：创建频率轴")

# 在音频处理中常用对数频率轴
freq_min = 20  # 20Hz
freq_max = 20000  # 20kHz
n_points = 10

freq_linear = np.linspace(freq_min, freq_max, n_points)  # 线性频率
freq_log = np.logspace(np.log10(freq_min), np.log10(freq_max), n_points)  # 对数频率

print(f"线性频率轴 (Hz):")
for i, freq in enumerate(freq_linear):
    print(f"  点{i}: {freq:8.1f} Hz")

print(f"\n对数频率轴 (Hz):")
for i, freq in enumerate(freq_log):
    print(f"  点{i}: {freq:8.1f} Hz")

# 5. 创建网格坐标
print("\n5. 创建网格坐标")

# 一维坐标
x = np.linspace(-5, 5, 11)
y = np.linspace(-5, 5, 11)
print(f"x坐标: {x}")
print(f"y坐标: {y}")

# 创建网格
X, Y = np.meshgrid(x, y)
print(f"X网格形状: {X.shape}")
print(f"Y网格形状: {Y.shape}")

# 计算函数值
Z = np.sin(np.sqrt(X**2 + Y**2))
print(f"Z = sin(√(x²+y²)) 形状: {Z.shape}")
print(f"Z[0,0] = {Z[0,0]:.3f}")

# 6. 性能对比
print("\n6. 序列函数性能对比")

n_points = 1000000
print(f"创建{n_points:,}个点的序列:")

# linspace
start = time.time()
ls = np.linspace(0, 1, n_points)
time_linspace = time.time() - start

# logspace
start = time.time()
ls_log = np.logspace(0, 6, n_points)
time_logspace = time.time() - start

# arange
start = time.time()
ar = np.arange(0, n_points) * 1.0 / (n_points - 1)
time_arange = time.time() - start

print(f"  linspace: {time_linspace:.6f}秒")
print(f"  logspace: {time_logspace:.6f}秒")
print(f"  arange模拟: {time_arange:.6f}秒")

# 验证等价性
print(f"\n验证等价性:")
print(f"  linspace和arange结果近似: {np.allclose(ls[:5], ar[:5], rtol=1e-10)}")

# 7. 精度测试
print("\n7. 精度测试")

# 比较linspace和手动计算的精度
start, end, n = 0, 1, 1000000
linspace_result = np.linspace(start, end, n)
manual_result = start + (end - start) * np.arange(n) / (n - 1)

max_error = np.max(np.abs(linspace_result - manual_result))
print(f"linspace与手动计算最大误差: {max_error:.2e}")

# 检查端点
print(f"linspace端点: [{linspace_result[0]:.10f}, {linspace_result[-1]:.10f}]")
print(f"期望端点: [{start:.10f}, {end:.10f}]")

4. 数据类型转换：astype()方法详解

print("\n=== 数据类型转换：astype()方法 ===")

# 1. 基本用法
print("1. 基本用法")

# 创建原始数组
arr_int = np.array([1, 2, 3, 4, 5])
print(f"原始数组: {arr_int}, dtype={arr_int.dtype}")

# 转换为浮点数
arr_float = arr_int.astype(np.float64)
print(f"转换为float64: {arr_float}, dtype={arr_float.dtype}")

# 转换为复数
arr_complex = arr_int.astype(np.complex128)
print(f"转换为complex128: {arr_complex}, dtype={arr_complex.dtype}")

# 2. 不同类型间的转换
print("\n2. 不同类型间的转换")

# 浮点数转整数（截断）
arr_float_src = np.array([1.2, 2.7, 3.5, 4.9, 5.1])
arr_int_from_float = arr_float_src.astype(np.int32)
print(f"浮点数数组: {arr_float_src}")
print(f"转换为int32(截断): {arr_int_from_float}")

# 四舍五入转换
arr_rounded = np.round(arr_float_src).astype(np.int32)
print(f"四舍五入后转换: {arr_rounded}")

# 3. 向下转换（可能丢失精度）
print("\n3. 向下转换（可能丢失精度）")

# 大整数转小整数
arr_large = np.array([1000, 2000, 3000, 4000, 5000], dtype=np.int64)
arr_small = arr_large.astype(np.int16)
print(f"int64数组: {arr_large}")
print(f"转换为int16: {arr_small}")
print(f"值是否相等: {np.array_equal(arr_large, arr_small)}")

# 浮点数精度降低
arr_double = np.array([1.123456789, 2.234567890, 3.345678901], dtype=np.float64)
arr_float32 = arr_double.astype(np.float32)
print(f"\nfloat64数组: {arr_double}")
print(f"转换为float32: {arr_float32}")
print(f"精度损失: {arr_double - arr_float32}")

# 4. 布尔类型转换
print("\n4. 布尔类型转换")

# 非零值转换为True
arr_mixed = np.array([0, 1, -1, 2.5, 0.0, 100])
arr_bool = arr_mixed.astype(bool)
print(f"混合数组: {arr_mixed}")
print(f"转换为bool: {arr_bool}")

# 布尔转数值
bool_arr = np.array([True, False, True, True, False])
int_from_bool = bool_arr.astype(np.int32)
float_from_bool = bool_arr.astype(np.float64)
print(f"\n布尔数组: {bool_arr}")
print(f"转换为int32: {int_from_bool}")
print(f"转换为float64: {float_from_bool}")

# 5. 字符串转换
print("\n5. 字符串转换")

# 数值转字符串
arr_numbers = np.array([1, 2, 3, 4, 5])
arr_str = arr_numbers.astype(str)
print(f"数值数组: {arr_numbers}, dtype={arr_numbers.dtype}")
print(f"转换为字符串: {arr_str}, dtype={arr_str.dtype}")

# 字符串转数值
arr_str_src = np.array(['1', '2', '3', '4', '5'])
arr_from_str = arr_str_src.astype(np.float64)
print(f"\n字符串数组: {arr_str_src}")
print(f"转换为float64: {arr_from_str}")

# 6. 内存占用变化
print("\n6. 内存占用变化")

# 创建大数组测试内存
large_arr = np.ones(1000000, dtype=np.float64)
print(f"float64数组:")
print(f"  元素数: {large_arr.size:,}")
print(f"  每个元素: {large_arr.itemsize}字节")
print(f"  总内存: {large_arr.nbytes:,}字节")

# 转换为float32
arr_float32 = large_arr.astype(np.float32)
print(f"\n转换为float32:")
print(f"  每个元素: {arr_float32.itemsize}字节")
print(f"  总内存: {arr_float32.nbytes:,}字节")
print(f"  内存减少: {(1 - arr_float32.nbytes/large_arr.nbytes)*100:.1f}%")

# 转换为int16
arr_int16 = large_arr.astype(np.int16)
print(f"\n转换为int16:")
print(f"  每个元素: {arr_int16.itemsize}字节")
print(f"  总内存: {arr_int16.nbytes:,}字节")
print(f"  内存减少: {(1 - arr_int16.nbytes/large_arr.nbytes)*100:.1f}%")

# 7. 性能测试
print("\n7. astype()性能测试")

size = 10000000
arr_large = np.random.randn(size)
print(f"转换{size:,}个元素的数组:")

# float64 -> float32
start = time.time()
arr_float32 = arr_large.astype(np.float32)
time_64_to_32 = time.time() - start

# float64 -> int32
start = time.time()
arr_int32 = arr_large.astype(np.int32)
time_64_to_int32 = time.time() - start

# float64 -> complex128
start = time.time()
arr_complex128 = arr_large.astype(np.complex128)
time_64_to_complex = time.time() - start

print(f"  float64 -> float32: {time_64_to_32:.4f}秒")
print(f"  float64 -> int32: {time_64_to_int32:.4f}秒")
print(f"  float64 -> complex128: {time_64_to_complex:.4f}秒")

# 8. 实用示例：图像数据类型转换
print("\n8. 实用示例：图像数据类型转换")

def simulate_image_processing():
    """模拟图像数据类型转换"""
    # 模拟8位图像数据 (0-255)
    image_8bit = np.random.randint(0, 256, (100, 100), dtype=np.uint8)
    print(f"原始8位图像: shape={image_8bit.shape}, dtype={image_8bit.dtype}")
    print(f"  值范围: [{image_8bit.min()}, {image_8bit.max()}]")
    
    # 转换为浮点数进行运算
    image_float = image_8bit.astype(np.float32) / 255.0
    print(f"\n转换为float32归一化: dtype={image_float.dtype}")
    print(f"  值范围: [{image_float.min():.3f}, {image_float.max():.3f}]")
    
    # 进行图像处理（例如调整亮度）
    image_brightened = np.clip(image_float * 1.5, 0, 1)
    
    # 转换回8位图像
    image_8bit_result = (image_brightened * 255).astype(np.uint8)
    print(f"\n转换回uint8: dtype={image_8bit_result.dtype}")
    print(f"  值范围: [{image_8bit_result.min()}, {image_8bit_result.max()}]")
    
    return image_8bit, image_8bit_result

original, processed = simulate_image_processing()

# 9. 类型安全检查
print("\n9. 类型安全与溢出检查")

# 检查溢出
def check_overflow_conversion(src_arr, target_dtype):
    """检查类型转换是否会导致溢出"""
    print(f"\n转换检查: {src_arr.dtype} -> {target_dtype}")
    print(f"  源数组范围: [{src_arr.min()}, {src_arr.max()}]")
    
    target_info = np.iinfo(target_dtype) if np.issubdtype(target_dtype, np.integer) else np.finfo(target_dtype)
    print(f"  目标类型范围: [{target_info.min}, {target_info.max}]")
    
    if np.issubdtype(target_dtype, np.integer):
        overflow_mask = (src_arr < target_info.min) | (src_arr > target_info.max)
        if np.any(overflow_mask):
            print(f"  ⚠️ 警告: {np.sum(overflow_mask)}个元素可能溢出！")
        else:
            print(f"  ✓ 转换安全")
    
    return src_arr.astype(target_dtype)

# 测试安全转换
arr_safe = np.array([100, 200, 300], dtype=np.int32)
result_safe = check_overflow_conversion(arr_safe, np.int16)

# 测试不安全转换
arr_unsafe = np.array([100, 200, 40000], dtype=np.int32)
result_unsafe = check_overflow_conversion(arr_unsafe, np.int16)
print(f"  转换结果: {result_unsafe} (注意40000溢出为负数)")

5. 综合实战应用

5.1 创建多种类型的数组

print("\n=== 综合实战：创建多种类型的数组 ===")

def create_arrays_demo():
    """创建多种类型的数组示例"""
    
    print("1. 创建不同维度的数组:")
    print("-" * 40)
    
    # 0维数组（标量）
    arr_0d = np.array(42)
    print(f"0维数组: {arr_0d}, shape={arr_0d.shape}, ndim={arr_0d.ndim}")
    
    # 1维数组
    arr_1d = np.array([1, 2, 3, 4, 5])
    print(f"1维数组: {arr_1d}, shape={arr_1d.shape}, ndim={arr_1d.ndim}")
    
    # 2维数组
    arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
    print(f"2维数组:\n{arr_2d}, shape={arr_2d.shape}, ndim={arr_2d.ndim}")
    
    # 3维数组
    arr_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
    print(f"3维数组 shape={arr_3d.shape}, ndim={arr_3d.ndim}")
    
    print("\n2. 创建特殊数组:")
    print("-" * 40)
    
    # 创建单位矩阵
    identity = np.eye(3)
    print(f"3×3单位矩阵:\n{identity}")
    
    # 创建对角矩阵
    diagonal = np.diag([1, 2, 3, 4])
    print(f"对角矩阵:\n{diagonal}")
    
    # 创建范德蒙矩阵
    vander = np.vander([1, 2, 3, 4], 3)
    print(f"范德蒙矩阵:\n{vander}")
    
    print("\n3. 从已有数组创建新数组:")
    print("-" * 40)
    
    # 从已有数组创建
    original = np.array([[1, 2, 3], [4, 5, 6]])
    
    # 复制数组
    copied = np.copy(original)
    print(f"复制数组:\n{copied}")
    
    # 类似数组（相同形状和类型）
    like_arr = np.zeros_like(original)
    print(f"类似数组(全零):\n{like_arr}")
    
    # 相同形状但不同值
    full_like = np.full_like(original, 7)
    print(f"类似数组(全7):\n{full_like}")
    
    print("\n4. 数组属性验证:")
    print("-" * 40)
    
    test_array = np.random.rand(3, 4, 5)
    print(f"测试数组形状: {test_array.shape}")
    print(f"总元素数验证: {np.prod(test_array.shape)} = {test_array.size}")
    print(f"内存占用: {test_array.nbytes:,} 字节")
    
    return {
        '0d': arr_0d,
        '1d': arr_1d,
        '2d': arr_2d,
        '3d': arr_3d,
        'identity': identity,
        'diagonal': diagonal,
        'vander': vander
    }

arrays = create_arrays_demo()

5.2 性能优化实战

print("\n=== 性能优化实战 ===")

def performance_comparison():
    """各种创建方法的性能比较"""
    
    size = 1000000
    print(f"创建{size:,}个元素的数组性能比较:")
    print("=" * 60)
    
    methods = [
        ("np.zeros()", lambda: np.zeros(size)),
        ("np.ones()", lambda: np.ones(size)),
        ("np.empty()", lambda: np.empty(size)),
        ("np.full()", lambda: np.full(size, 3.14)),
        ("np.arange()", lambda: np.arange(size)),
        ("np.linspace()", lambda: np.linspace(0, 1, size)),
        ("np.random.rand()", lambda: np.random.rand(size)),
    ]
    
    results = {}
    
    for name, func in methods:
        # 预热
        _ = func()
        
        # 正式测试
        start = time.time()
        result = func()
        elapsed = time.time() - start
        
        results[name] = {
            'time': elapsed,
            'array': result,
            'memory': result.nbytes
        }
        
        print(f"{name:20} 时间: {elapsed:.6f}秒, 内存: {result.nbytes:,}字节")
    
    # 找出最快的
    fastest = min(results.items(), key=lambda x: x[1]['time'])
    print(f"\n最快的创建方法: {fastest[0]} ({fastest[1]['time']:.6f}秒)")
    
    # 内存使用分析
    print("\n内存使用分析:")
    for name, data in results.items():
        array = data['array']
        print(f"{name:20} dtype: {array.dtype}, "
              f"itemsize: {array.itemsize}字节, "
              f"总内存: {data['memory']:,}字节")
    
    return results

# 运行性能测试
print("小规模测试 (预热):")
small_results = performance_comparison()

5.3 实用工具函数

print("\n=== 实用工具函数 ===")

def array_info(arr, name="数组"):
    """显示数组的详细信息"""
    print(f"\n{name}信息:")
    print(f"  数组内容:\n{arr}")
    print(f"  形状 (shape): {arr.shape}")
    print(f"  维度 (ndim): {arr.ndim}")
    print(f"  大小 (size): {arr.size}")
    print(f"  数据类型 (dtype): {arr.dtype}")
    print(f"  元素大小 (itemsize): {arr.itemsize}字节")
    print(f"  总内存 (nbytes): {arr.nbytes:,}字节")
    print(f"  步幅 (strides): {arr.strides}")
    
    if hasattr(arr, 'flags'):
        print(f"  标志 (flags):")
        flags = arr.flags
        for flag in ['C_CONTIGUOUS', 'F_CONTIGUOUS', 'OWNDATA', 'WRITEABLE']:
            if hasattr(flags, flag):
                value = getattr(flags, flag)
                print(f"    {flag}: {value}")

def create_array_advanced(dimensions, dtype=np.float64, fill_value=None):
    """高级数组创建工具"""
    
    if isinstance(dimensions, int):
        dimensions = (dimensions,)
    
    if fill_value is None:
        # 默认创建随机数组
        if np.issubdtype(dtype, np.integer):
            arr = np.random.randint(0, 100, dimensions, dtype=dtype)
        elif np.issubdtype(dtype, np.floating):
            arr = np.random.randn(*dimensions).astype(dtype)
        elif np.issubdtype(dtype, np.complexfloating):
            real = np.random.randn(*dimensions)
            imag = np.random.randn(*dimensions)
            arr = (real + 1j * imag).astype(dtype)
        else:
            arr = np.zeros(dimensions, dtype=dtype)
    else:
        # 使用指定值填充
        arr = np.full(dimensions, fill_value, dtype=dtype)
    
    return arr

# 测试工具函数
print("测试array_info函数:")
test_array = np.random.rand(3, 4, 5)
array_info(test_array, "测试数组")

print("\n测试create_array_advanced函数:")
arr1 = create_array_advanced((3, 4), dtype=np.int32)
arr2 = create_array_advanced((2, 3, 4), dtype=np.float32, fill_value=3.14)
arr3 = create_array_advanced(5, dtype=np.complex128)

print(f"随机整数数组:\n{arr1}")
print(f"\n填充数组:\n{arr2}")
print(f"\n随机复数数组:\n{arr3}")

6. 常见问题与解决方案

6.1 内存问题

print("\n=== 常见问题：内存管理 ===")

# 1. 大数组内存占用
print("1. 大数组内存占用问题")

def estimate_memory_usage(shape, dtype=np.float64):
    """估算数组内存占用"""
    size = np.prod(shape)
    itemsize = np.dtype(dtype).itemsize
    total_bytes = size * itemsize
    
    units = ['字节', 'KB', 'MB', 'GB', 'TB']
    unit_index = 0
    total = float(total_bytes)
    
    while total >= 1024 and unit_index < len(units) - 1:
        total /= 1024
        unit_index += 1
    
    return total_bytes, total, units[unit_index]

# 测试不同形状的数组内存
test_shapes = [
    (1000, 1000),        # 100万元素
    (10000, 10000),      # 1亿元素
    (1000, 1000, 100),   # 1亿元素
]

dtypes = [np.int8, np.int32, np.float32, np.float64]

print("不同形状和类型数组的内存估算:")
print("形状              dtype      元素数        内存")
print("-" * 60)

for shape in test_shapes:
    for dtype in dtypes:
        total_bytes, readable, unit = estimate_memory_usage(shape, dtype)
        size = np.prod(shape)
        print(f"{str(shape):15} {str(dtype):10} {size:12,}  {readable:7.1f} {unit}")

# 2. 内存优化技巧
print("\n2. 内存优化技巧")

def optimize_array_memory(arr, target_dtype=None):
    """优化数组内存使用"""
    print(f"原始数组: shape={arr.shape}, dtype={arr.dtype}, 内存={arr.nbytes:,}字节")
    
    if target_dtype is None:
        # 自动选择合适的数据类型
        if np.issubdtype(arr.dtype, np.integer):
            min_val, max_val = arr.min(), arr.max()
            
            if min_val >= 0:  # 无符号整数
                if max_val <= 255:
                    target_dtype = np.uint8
                elif max_val <= 65535:
                    target_dtype = np.uint16
                elif max_val <= 4294967295:
                    target_dtype = np.uint32
                else:
                    target_dtype = np.uint64
            else:  # 有符号整数
                if min_val >= -128 and max_val <= 127:
                    target_dtype = np.int8
                elif min_val >= -32768 and max_val <= 32767:
                    target_dtype = np.int16
                elif min_val >= -2147483648 and max_val <= 2147483647:
                    target_dtype = np.int32
                else:
                    target_dtype = np.int64
        else:  # 浮点数
            # 检查是否可以转换为整数
            if np.allclose(arr, arr.astype(np.int32)):
                target_dtype = np.int32
            else:
                # 检查是否需要float64的精度
                arr_float32 = arr.astype(np.float32)
                if np.allclose(arr, arr_float32, rtol=1e-5):
                    target_dtype = np.float32
                else:
                    target_dtype = np.float64
    
    # 转换类型
    optimized = arr.astype(target_dtype)
    
    print(f"优化后数组: dtype={optimized.dtype}, 内存={optimized.nbytes:,}字节")
    print(f"内存减少: {(1 - optimized.nbytes/arr.nbytes)*100:.1f}%")
    
    return optimized

# 测试内存优化
print("\n内存优化示例:")
test_data = np.random.randint(0, 100, (1000, 1000), dtype=np.int64)
optimized_data = optimize_array_memory(test_data)

6.2 性能陷阱与优化

print("\n=== 常见问题：性能陷阱 ===")

# 1. 避免不必要的复制
print("1. 避免不必要的数组复制")

def demonstrate_copy_vs_view():
    """演示复制与视图的区别"""
    
    original = np.array([1, 2, 3, 4, 5])
    print(f"原始数组: {original}, id={id(original)}")
    
    # 视图（不复制数据）
    view = original[:3]  # 切片创建视图
    print(f"切片视图: {view}, id={id(view)}")
    print(f"视图.base is original: {view.base is original}")
    
    # 修改视图会影响原始数组
    view[0] = 100
    print(f"修改视图后原始数组: {original}")
    
    # 复制
    original[0] = 1  # 恢复原始值
    copy = original.copy()  # 显式复制
    print(f"\n复制数组: {copy}, id={id(copy)}")
    print(f"复制.base is original: {copy.base is original}")
    
    # 修改复制不会影响原始数组
    copy[0] = 200
    print(f"修改复制后原始数组: {original}")
    print(f"修改复制后复制数组: {copy}")
    
    return original, view, copy

orig, view, copy = demonstrate_copy_vs_view()

# 2. 高效创建数组的模式
print("\n2. 高效创建数组的模式")

def efficient_array_creation(size):
    """高效创建数组的几种模式"""
    
    print(f"创建{size:,}个元素的数组:")
    
    # 模式1: 预分配数组
    start = time.time()
    arr1 = np.empty(size)
    for i in range(size):
        arr1[i] = i * 2
    time1 = time.time() - start
    
    # 模式2: 使用列表推导式
    start = time.time()
    arr2 = np.array([i * 2 for i in range(size)])
    time2 = time.time() - start
    
    # 模式3: 使用NumPy向量化
    start = time.time()
    arr3 = np.arange(size) * 2
    time3 = time.time() - start
    
    print(f"  预分配+循环: {time1:.6f}秒")
    print(f"  列表推导式: {time2:.6f}秒")
    print(f"  NumPy向量化: {time3:.6f}秒")
    print(f"  向量化比循环快{time1/time3:.1f}倍")
    print(f"  向量化比列表推导式快{time2/time3:.1f}倍")
    
    return arr1, arr2, arr3

arr1, arr2, arr3 = efficient_array_creation(100000)

7. 总结与最佳实践

7.1 创建函数总结

函数	描述	常用参数	示例
`np.array()`	从Python序列创建	`object`, `dtype`, `copy`	`np.array([1,2,3])`
`np.arange()`	创建数值范围	`start`, `stop`, `step`	`np.arange(0, 10, 2)`
`np.linspace()`	线性间隔数组	`start`, `stop`, `num`	`np.linspace(0, 1, 5)`
`np.logspace()`	对数间隔数组	`start`, `stop`, `num`, `base`	`np.logspace(0, 3, 4)`
`np.zeros()`	全零数组	`shape`, `dtype`	`np.zeros((3,4))`
`np.ones()`	全一数组	`shape`, `dtype`	`np.ones((2,3))`
`np.full()`	填充数组	`shape`, `fill_value`, `dtype`	`np.full((3,3), 7)`
`np.empty()`	未初始化数组	`shape`, `dtype`	`np.empty(5)`
`np.eye()`	单位矩阵	`N`, `M`, `k`	`np.eye(3)`
`np.random.rand()`	[0,1)均匀分布	`d0`, `d1`, ...	`np.random.rand(3,4)`
`np.random.randn()`	标准正态分布	`d0`, `d1`, ...	`np.random.randn(100)`
`np.random.randint()`	整数均匀分布	`low`, `high`, `size`	`np.random.randint(0,10,5)`
`np.random.uniform()`	均匀分布	`low`, `high`, `size`	`np.random.uniform(0,1,5)`

7.2 最佳实践指南

# ✅ 最佳实践示例

# 1. 明确指定数据类型
# ❌ 不好
arr1 = np.array([1, 2, 3])  # 自动推断，可能不是最优类型

# ✅ 好
arr2 = np.array([1, 2, 3], dtype=np.int32)  # 明确指定
arr3 = np.array([1.0, 2.0, 3.0], dtype=np.float32)  # 节省内存

# 2. 使用合适的创建函数
# ❌ 不好
size = 1000
arr_slow = np.array([i for i in range(size)])  # 使用列表推导式

# ✅ 好
arr_fast = np.arange(size)  # 使用NumPy内置函数

# 3. 避免不必要的复制
# ❌ 不好
def process_array_bad(arr):
    arr = arr.copy()  # 总是复制，可能不必要
    # 处理数组
    return arr

# ✅ 好
def process_array_good(arr, inplace=False):
    if not inplace:
        arr = arr.copy()  # 只在需要时复制
    # 处理数组
    return arr

# 4. 内存优化
def create_optimized_array(shape, data_range, precision_needed):
    """根据需求创建优化数组"""
    
    min_val, max_val = data_range
    
    # 选择合适的数据类型
    if precision_needed > 1e-6:
        dtype = np.float64
    elif precision_needed > 1e-3:
        dtype = np.float32
    elif min_val >= 0:  # 无符号整数
        if max_val < 256:
            dtype = np.uint8
        elif max_val < 65536:
            dtype = np.uint16
        else:
            dtype = np.uint32
    else:  # 有符号整数
        if min_val >= -128 and max_val < 128:
            dtype = np.int8
        elif min_val >= -32768 and max_val < 32768:
            dtype = np.int16
        else:
            dtype = np.int32
    
    # 创建数组
    arr = np.zeros(shape, dtype=dtype)
    
    return arr

# 5. 批量操作优于循环
# ❌ 不好
def slow_operation(data):
    result = np.empty_like(data)
    for i in range(len(data)):
        result[i] = data[i] * 2 + 1
    return result

# ✅ 好
def fast_operation(data):
    return data * 2 + 1  # 向量化操作

7.3 学习建议

初学者：
- 掌握np.array(), np.arange(), np.zeros(), np.ones()基本用法
- 理解数组的shape, dtype, ndim属性
- 学会使用astype()进行类型转换
中级开发者：
- 掌握各种随机数组创建方法
- 理解linspace()和logspace()的区别
- 学会高效创建大型数组
- 掌握数组的内存布局和优化
高级开发者：
- 深入理解数组的内存管理
- 掌握视图和复制的区别
- 优化数组创建性能
- 处理特殊数据类型（复数、结构化数组等）
专家级：
- 自定义dtype
- 内存映射数组
- GPU数组（CuPy）
- 分布式数组（Dask）

7.4 常见错误与解决方案

# ❌ 常见错误1：忘记设置随机种子
np.random.seed(42)  # ✅ 始终设置随机种子以保证可复现性

# ❌ 常见错误2：使用Python列表操作NumPy数组
data = [1, 2, 3, 4, 5]
# result = data * 2  # ❌ Python列表重复，不是乘法
result = np.array(data) * 2  # ✅ NumPy数组广播

# ❌ 常见错误3：忽略数据类型导致的精度问题
arr_int = np.array([1, 2, 3])
result = arr_int / 2  # ❌ 整数除法
print(f"整数除法: {result}, dtype={result.dtype}")  # 自动转换为float

arr_float = np.array([1, 2, 3], dtype=float)
result = arr_float / 2  # ✅ 浮点数除法
print(f"浮点数除法: {result}, dtype={result.dtype}")

# ❌ 常见错误4：大数组内存问题
# 尝试创建超大数组
try:
    huge_array = np.ones((10000, 10000, 100))  # 80GB！
    print("创建成功")
except MemoryError as e:
    print(f"内存错误: {e}")
    # ✅ 解决方案：使用内存映射文件或分块处理
    print("建议: 使用np.memmap或分块处理大数组")

# ❌ 常见错误5：误解reshape的行为
arr = np.arange(10)
try:
    reshaped = arr.reshape(3, 4)  # ❌ 12个元素，但arr只有10个
except ValueError as e:
    print(f"reshape错误: {e}")
    # ✅ 解决方案：确保元素总数匹配
    reshaped = arr.reshape(2, 5)  # 2×5=10
    print(f"正确reshape: {reshaped.shape}")

8. 练习题

# 练习1：创建各种形状的数组
def exercise1():
    """创建以下数组：
    1. 形状(5,5)，主对角线为1，其他为0
    2. 形状(3,4,5)，所有元素为3.14
    3. 从0到2π的100个等间距点
    4. 形状(100,100)，元素为[0,1)随机数
    """
    pass

# 练习2：数组属性分析
def exercise2():
    """创建一个形状为(2,3,4,5)的随机数组，分析其所有属性"""
    pass

# 练习3：类型转换性能测试
def exercise3():
    """测试不同数据类型转换的性能差异"""
    pass

# 练习4：内存优化
def exercise4():
    """给定一个大型数组，优化其内存使用而不损失精度"""
    pass