import torch import cv2 import numpy as np import torchvision from yolo11_standalone import YOLO11 # COCO 80类 类别名称 CLASSES = [ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] # 生成随机颜色用于绘图 COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)): """ 将图像缩放并填充到指定大小 (保持纵横比) """ shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # 计算缩放比例 r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) # 计算padding new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding dw, dh = dw / 2, dh / 2 # divide padding into 2 sides if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) # 添加边框 im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) return im, r, (dw, dh) def xywh2xyxy(x): """Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2]""" y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y return y def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, max_det=300): """ 非极大值抑制 (NMS) prediction: [Batch, 84, Anchors] """ # 1. 转置: [Batch, 84, Anchors] -> [Batch, Anchors, 84] prediction = prediction.transpose(1, 2) bs = prediction.shape[0] # batch size nc = prediction.shape[2] - 4 # number of classes # 修复: 使用 max(-1) 在最后一个维度(类别)上寻找最大置信度 # 之前的 max(1) 错误地在 Anchors 维度上操作了 xc = prediction[..., 4:].max(-1)[0] > conf_thres # candidates output = [torch.zeros((0, 6), device=prediction.device)] * bs for xi, x in enumerate(prediction): # image index, image inference x = x[xc[xi]] # confidence filtering if not x.shape[0]: continue # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Confidence and Class conf, j = x[:, 4:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] # Check shape n = x.shape[0] if not n: continue elif n > max_det: x = x[x[:, 4].argsort(descending=True)[:max_det]] # Batched NMS c = x[:, 5:6] * 7680 # classes boxes, scores = x[:, :4] + c, x[:, 4] i = torchvision.ops.nms(boxes, scores, iou_thres) output[xi] = x[i] return output def main(): # 1. 初始化模型 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") model = YOLO11(nc=80, scale='s') # 加载你之前转换好的纯净权重 model.load_weights("yolo11s.pth") model.to(device) model.eval() # model.train() # 2. 读取图片 img_path = "1.jpg" # 请替换为你本地的图片路径 img0 = cv2.imread(img_path) assert img0 is not None, f"Image Not Found {img_path}" # 3. 预处理 # Letterbox resize img, ratio, (dw, dh) = letterbox(img0, new_shape=(640, 640)) # BGR to RGB, HWC to CHW img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img_tensor = torch.from_numpy(img).to(device) img_tensor = img_tensor.float() img_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 if img_tensor.ndim == 3: img_tensor = img_tensor.unsqueeze(0) # 4. 推理 print("开始推理...") with torch.no_grad(): pred = model(img_tensor) # 5. 后处理 (NMS) pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45) # 6. 绘制结果 det = pred[0] # 仅处理第一张图片 if len(det): # 将坐标映射回原图尺寸 # det[:, :4] 是 x1, y1, x2, y2 det[:, [0, 2]] -= dw # x padding det[:, [1, 3]] -= dh # y padding det[:, :4] /= ratio # 裁剪坐标防止越界 det[:, 0].clamp_(0, img0.shape[1]) det[:, 1].clamp_(0, img0.shape[0]) det[:, 2].clamp_(0, img0.shape[1]) det[:, 3].clamp_(0, img0.shape[0]) print(f"检测到 {len(det)} 个目标") for *xyxy, conf, cls in det: c = int(cls) label = f'{CLASSES[c]} {conf:.2f}' p1, p2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) # 画框 color = COLORS[c] cv2.rectangle(img0, p1, p2, color, 2, lineType=cv2.LINE_AA) # 画标签背景 t_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=1)[0] p2_label = p1[0] + t_size[0], p1[1] - t_size[1] - 3 cv2.rectangle(img0, p1, p2_label, color, -1, cv2.LINE_AA) # 画文字 cv2.putText(img0, label, (p1[0], p1[1] - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA) print(f" - {label} at {p1}-{p2}") # 7. 显示/保存结果 cv2.imwrite("result.jpg", img0) print("结果已保存至 result.jpg") def import_os_exists(path): import os return os.path.exists(path) if __name__ == "__main__": main()