移除detect头中动态运算图，动态部分分离出来，并更新相关的推理代码示例

2025-12-30 17:10:01 +08:00
parent 9df330875d
commit 553a63f521
3 changed files with 203 additions and 621 deletions
--- a/inference.py
+++ b/inference.py
@@ -2,9 +2,8 @@ import torch
 import cv2
 import numpy as np
 import torchvision
-from yolo11_standalone import YOLO11
+from yolo11_standalone import YOLO11, YOLOPostProcessor

-# COCO 80类 类别名称
 CLASSES = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
@@ -17,37 +16,29 @@ CLASSES = [
    "hair drier", "toothbrush"
 ]

-# 生成随机颜色用于绘图
 COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

 def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
-    """
-    将图像缩放并填充到指定大小 (保持纵横比)
-    """
-    shape = im.shape[:2]  # current shape [height, width]
+    shape = im.shape[:2]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

-    # 计算缩放比例
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

-    # 计算padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
-    dw, dh = dw / 2, dh / 2  # divide padding into 2 sides
+    dw, dh = dw / 2, dh / 2 

-    if shape[::-1] != new_unpad:  # resize
+    if shape[::-1] != new_unpad:
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    
-    # 添加边框
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return im, r, (dw, dh)

 def xywh2xyxy(x):
-    """Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2]"""
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
@@ -56,43 +47,27 @@ def xywh2xyxy(x):
    return y

 def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, max_det=300):
-    """
-    非极大值抑制 (NMS)
-    prediction: [Batch, 84, Anchors]
-    """
-    # 1. 转置: [Batch, 84, Anchors] -> [Batch, Anchors, 84]
    prediction = prediction.transpose(1, 2)
    
-    bs = prediction.shape[0]  # batch size
-    nc = prediction.shape[2] - 4  # number of classes
+    bs = prediction.shape[0]
+    nc = prediction.shape[2] - 4
    
-    # 修复: 使用 max(-1) 在最后一个维度(类别)上寻找最大置信度
-    # 之前的 max(1) 错误地在 Anchors 维度上操作了
-    xc = prediction[..., 4:].max(-1)[0] > conf_thres  # candidates
+    xc = prediction[..., 4:].max(-1)[0] > conf_thres

    output = [torch.zeros((0, 6), device=prediction.device)] * bs
    
-    for xi, x in enumerate(prediction):  # image index, image inference
-        x = x[xc[xi]]  # confidence filtering
-
+    for xi, x in enumerate(prediction):
+        x = x[xc[xi]]
        if not x.shape[0]:
            continue
-
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])
-
-        # Confidence and Class
        conf, j = x[:, 4:].max(1, keepdim=True)
        x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
-
-        # Check shape
        n = x.shape[0]
        if not n:
            continue
        elif n > max_det:
            x = x[x[:, 4].argsort(descending=True)[:max_det]]
-
-        # Batched NMS
        c = x[:, 5:6] * 7680  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]
        i = torchvision.ops.nms(boxes, scores, iou_thres)
@@ -101,28 +76,21 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, max_det=300
    return output

 def main():
-    # 1. 初始化模型
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    model = YOLO11(nc=80, scale='s')
-    # 加载你之前转换好的纯净权重
    model.load_weights("yolo11s.pth") 
    model.to(device)
    model.eval()
-    # model.train()
+    post_std = YOLOPostProcessor(model.model[-1], use_segmentation=False)

-    # 2. 读取图片
-    img_path = "1.jpg" # 请替换为你本地的图片路径
-    
+    img_path = "1.jpg"
    img0 = cv2.imread(img_path)
    assert img0 is not None, f"Image Not Found {img_path}"

-    # 3. 预处理
-    # Letterbox resize
    img, ratio, (dw, dh) = letterbox(img0, new_shape=(640, 640))
    
-    # BGR to RGB, HWC to CHW
    img = img[:, :, ::-1].transpose(2, 0, 1)  
    img = np.ascontiguousarray(img)
    
@@ -132,25 +100,20 @@ def main():
    if img_tensor.ndim == 3:
        img_tensor = img_tensor.unsqueeze(0)

-    # 4. 推理
    print("开始推理...")
    with torch.no_grad():
        pred = model(img_tensor)

-    # 5. 后处理 (NMS)
+    pred = post_std(pred)
    pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45)

-    # 6. 绘制结果
-    det = pred[0] # 仅处理第一张图片
+    det = pred[0]
    
    if len(det):
-        # 将坐标映射回原图尺寸
-        # det[:, :4] 是 x1, y1, x2, y2
        det[:, [0, 2]] -= dw  # x padding
        det[:, [1, 3]] -= dh  # y padding
        det[:, :4] /= ratio
        
-        # 裁剪坐标防止越界
        det[:, 0].clamp_(0, img0.shape[1])
        det[:, 1].clamp_(0, img0.shape[0])
        det[:, 2].clamp_(0, img0.shape[1])
@@ -163,21 +126,17 @@ def main():
            label = f'{CLASSES[c]} {conf:.2f}'
            p1, p2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3]))
            
-            # 画框
            color = COLORS[c]
            cv2.rectangle(img0, p1, p2, color, 2, lineType=cv2.LINE_AA)
            
-            # 画标签背景
            t_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=1)[0]
            p2_label = p1[0] + t_size[0], p1[1] - t_size[1] - 3
            cv2.rectangle(img0, p1, p2_label, color, -1, cv2.LINE_AA)
            
-            # 画文字
            cv2.putText(img0, label, (p1[0], p1[1] - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA)
            
            print(f" - {label} at {p1}-{p2}")

-    # 7. 显示/保存结果
    cv2.imwrite("result.jpg", img0)
    print("结果已保存至 result.jpg")