参考:ubuntu20.04安装OpenPcdet,(CUDA版本11.8,显卡4090)(CUDA版本12.1,显卡3060)_cuda11.8安装openpcdet-CSDN博客
OpenPcedt下实现自定义纯点云kitti格式数据集的训练---centerpoint_openpcdet centerpoint-CSDN博客
这个博主写了如何用ubuntu20+4090对centerpoint进行训练测试。但是我公司用的是最新的5090,系统为ubuntu22,才能适配显卡驱动。
根据网上的资料:(99+ 封私信 / 80 条消息) CUDA 和 Pytorch 安装(50系列踩坑版) - 知乎
目前5090只能安装的版本组合为:
- cuda 12.8
- cudnn 8.9.7
- python 3.12
- torch 2.10 (Preview Nightly)
于是先安装cuda和cudnn,之后git clone pcdet源码,在conda环境里尝试安装:
conda create -n pcdet python=3.12
conda activate pcdet
pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install spconv-cu126 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install -e . --use-pep517 --no-build-isolation
pip install open3d -i https://pypi.tuna.tsinghua.edu.cn/simple
记录一下我安装的torch:
然后按照上面博主的做法,运行demo:
cd tools
python demo.py --cfg_file cfgs/kitti_models/pointpillar.yaml \
--ckpt pointpillar_pthbin/pointpillar_7728.pth \
--data_path pointpillar_pthbin/000001.bin
但是报错,说没有安装av2和kornia。但是安装了以后,还是报错:
kornia和pytorch版本不匹配。而我查了一下,kornia目前还不支持我们的pytorch最新版,所以我的做法是直接把av2需要的地方注释掉,这样kornia和av2就不会import了。因为我主要用的是自己采集的数据做成kitti格式,不需要av2的那种。
更改下面这个脚本pcdet/datasets/__init__.py就可以:注释掉argo相关的两处

注释后,就直接可以成功运行demo:

然后还是参考上面博主的帖子,训练自己的数据集。一开始运行能训练,但是评估的时候有个报错:
cd tools
python train.py --cfg_file cfgs/kitti_models/centerpoint.yaml --epochs 10 --ckpt_save_interval 5 --batch_size 1

是pytorch版本的问题。错误发生在训练结束后加载模型进行验证时,PyTorch 2.6+ 版本中 torch.load() 的 weights_only 参数默认值从 False 改为 True,这增强了安全性但破坏了向后兼容性。
改了这里:pcdet/models/detectors/detector3d_template.py

再执行,评估可以了,但是计算评估结果数值时报了段错误:

因为训练已经成功结束。所以直接用test.py脚本,也是这个错误:
python test.py --cfg_file cfgs/kitti_models/centerpoint.yaml --ckpt ../output/kitti_models/centerpoint/default/ckpt/checkpoint_epoch_20.pth --batch_size 1 --workers 1 --max_waiting_mins 0
排查后,问题出现在这里:evaluation函数

进入这个函数,发现居然是这句import失败产生的段错误:
测试一下,确实如此,eval.py无法被import:
python -c "from pcdet.datasets.kitti.kitti_object_eval_python import eval; print('Import successful')"
![]()
再往下深入排查,是eval.py在这里import rotate_iou.py的时候报错:

再往下看,罪魁祸首就是这个函数:

问了deepseek,他说是因为rotate_iou.py使用了numba.cuda报错了。也就是本质原因还是gpu和cuda版本的问题。因为我在家里的4070上是没有这个报错的。
deepseek给我生成了一个cpu版本的rotate_iou_cpu.py。也就是说,既然这个段错误出现的地方其实是在计算eval结果里的iou数值的时候,报错本质并不影响训练和评估本身,那我们完全可以把原来在gpu里计算的内容,换成用cpu计算,就可以了。
# rotate_iou_cpu.py
import math
import numpy as np
import numba
@numba.jit(nopython=True)
def trangle_area(a, b, c):
return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0])) / 2.0
@numba.jit(nopython=True)
def area(int_pts, num_of_inter):
area_val = 0.0
for i in range(num_of_inter - 2):
area_val += abs(trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4],
int_pts[2 * i + 4:2 * i + 6]))
return area_val
@numba.jit(nopython=True)
def sort_vertex_in_convex_polygon(int_pts, num_of_inter):
if num_of_inter > 0:
center = np.zeros(2, dtype=np.float32)
for i in range(num_of_inter):
center[0] += int_pts[2 * i]
center[1] += int_pts[2 * i + 1]
center[0] /= num_of_inter
center[1] /= num_of_inter
vs = np.zeros(num_of_inter, dtype=np.float32)
for i in range(num_of_inter):
vx = int_pts[2 * i] - center[0]
vy = int_pts[2 * i + 1] - center[1]
d = math.sqrt(vx * vx + vy * vy)
if d > 1e-8: # 避免除零
vx = vx / d
vy = vy / d
if vy < 0:
vx = -2 - vx
vs[i] = vx
# 排序
for i in range(num_of_inter):
for j in range(i + 1, num_of_inter):
if vs[i] > vs[j]:
# 交换vs
temp_vs = vs[i]
vs[i] = vs[j]
vs[j] = temp_vs
# 交换点坐标
temp_x = int_pts[2 * i]
temp_y = int_pts[2 * i + 1]
int_pts[2 * i] = int_pts[2 * j]
int_pts[2 * i + 1] = int_pts[2 * j + 1]
int_pts[2 * j] = temp_x
int_pts[2 * j + 1] = temp_y
@numba.jit(nopython=True)
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
A = np.zeros(2, dtype=np.float32)
B = np.zeros(2, dtype=np.float32)
C = np.zeros(2, dtype=np.float32)
D = np.zeros(2, dtype=np.float32)
A[0] = pts1[2 * i]
A[1] = pts1[2 * i + 1]
B[0] = pts1[2 * ((i + 1) % 4)]
B[1] = pts1[2 * ((i + 1) % 4) + 1]
C[0] = pts2[2 * j]
C[1] = pts2[2 * j + 1]
D[0] = pts2[2 * ((j + 1) % 4)]
D[1] = pts2[2 * ((j + 1) % 4) + 1]
BA0 = B[0] - A[0]
BA1 = B[1] - A[1]
DA0 = D[0] - A[0]
CA0 = C[0] - A[0]
DA1 = D[1] - A[1]
CA1 = C[1] - A[1]
acd = DA1 * CA0 > CA1 * DA0
bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0])
if acd != bcd:
abc = CA1 * BA0 > BA1 * CA0
abd = DA1 * BA0 > BA1 * DA0
if abc != abd:
DC0 = D[0] - C[0]
DC1 = D[1] - C[1]
ABBA = A[0] * B[1] - B[0] * A[1]
CDDC = C[0] * D[1] - D[0] * C[1]
DH = BA1 * DC0 - BA0 * DC1
if abs(DH) < 1e-8:
return False
Dx = ABBA * DC0 - BA0 * CDDC
Dy = ABBA * DC1 - BA1 * CDDC
temp_pts[0] = Dx / DH
temp_pts[1] = Dy / DH
return True
return False
@numba.jit(nopython=True)
def point_in_quadrilateral(pt_x, pt_y, corners):
ab0 = corners[2] - corners[0]
ab1 = corners[3] - corners[1]
ad0 = corners[6] - corners[0]
ad1 = corners[7] - corners[1]
ap0 = pt_x - corners[0]
ap1 = pt_y - corners[1]
abab = ab0 * ab0 + ab1 * ab1
abap = ab0 * ap0 + ab1 * ap1
adad = ad0 * ad0 + ad1 * ad1
adap = ad0 * ap0 + ad1 * ap1
return abab >= abap and abap >= 0 and adad >= adap and adap >= 0
@numba.jit(nopython=True)
def quadrilateral_intersection(pts1, pts2, int_pts):
num_of_inter = 0
for i in range(4):
if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
int_pts[num_of_inter * 2] = pts1[2 * i]
int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
num_of_inter += 1
if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
int_pts[num_of_inter * 2] = pts2[2 * i]
int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
num_of_inter += 1
temp_pts = np.zeros(2, dtype=np.float32)
for i in range(4):
for j in range(4):
has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
if has_pts:
int_pts[num_of_inter * 2] = temp_pts[0]
int_pts[num_of_inter * 2 + 1] = temp_pts[1]
num_of_inter += 1
return num_of_inter
@numba.jit(nopython=True)
def rbbox_to_corners(corners, rbbox):
angle = rbbox[4]
a_cos = math.cos(angle)
a_sin = math.sin(angle)
center_x = rbbox[0]
center_y = rbbox[1]
x_d = rbbox[2]
y_d = rbbox[3]
# 检查边界框尺寸是否有效
if x_d < 1e-8 or y_d < 1e-8:
# 无效的边界框,返回零角点
corners[:] = 0.0
return
corners_x = np.array([-x_d / 2, -x_d / 2, x_d / 2, x_d / 2], dtype=np.float32)
corners_y = np.array([-y_d / 2, y_d / 2, y_d / 2, -y_d / 2], dtype=np.float32)
for i in range(4):
corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
@numba.jit(nopython=True)
def inter(rbbox1, rbbox2):
corners1 = np.zeros(8, dtype=np.float32)
corners2 = np.zeros(8, dtype=np.float32)
intersection_corners = np.zeros(16, dtype=np.float32)
rbbox_to_corners(corners1, rbbox1)
rbbox_to_corners(corners2, rbbox2)
num_intersection = quadrilateral_intersection(corners1, corners2, intersection_corners)
if num_intersection == 0:
return 0.0
sort_vertex_in_convex_polygon(intersection_corners, num_intersection)
return area(intersection_corners, num_intersection)
@numba.jit(nopython=True)
def devRotateIoUEval(rbox1, rbox2, criterion=-1):
# 检查边界框是否有效
if rbox1[2] < 1e-8 or rbox1[3] < 1e-8 or rbox2[2] < 1e-8 or rbox2[3] < 1e-8:
return 0.0
area1 = rbox1[2] * rbox1[3]
area2 = rbox2[2] * rbox2[3]
area_inter = inter(rbox1, rbox2)
# 加强除零保护
if criterion == -1:
denominator = area1 + area2 - area_inter
if abs(denominator) < 1e-8:
return 0.0
return area_inter / denominator
elif criterion == 0:
if abs(area1) < 1e-8:
return 0.0
return area_inter / area1
elif criterion == 1:
if abs(area2) < 1e-8:
return 0.0
return area_inter / area2
else:
return area_inter
@numba.jit(nopython=True, parallel=True)
def rotate_iou_cpu_eval(boxes, query_boxes, criterion=-1):
"""完整的CPU版本旋转IoU计算,与GPU版本计算逻辑一致"""
N = boxes.shape[0]
K = query_boxes.shape[0]
iou_matrix = np.zeros((N, K), dtype=np.float32)
# 使用numba并行计算
for i in numba.prange(N):
for j in range(K):
iou_matrix[i, j] = devRotateIoUEval(boxes[i], query_boxes[j], criterion)
return iou_matrix
# 为了与eval.py兼容,我们提供相同的函数签名
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""
与GPU版本相同的函数签名,但使用CPU计算
device_id参数被保留以保持兼容性,但实际不使用
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
result = rotate_iou_cpu_eval(boxes, query_boxes, criterion)
return result.astype(box_dtype)
然后在eval.py里选择导入这个:

此时就可以import成功了:
![]()
然后我们再尝试运行test.py,就可以了:

成功运行!我们这回可以正常训练和评估了。其实这些错误都是因为5090版本太新了,而python相关的三方包还没更新到与之适配的版本。
1012

被折叠的 条评论
为什么被折叠?



