import torch import cv2 import numpy as np import torchvision from yolo11_standalone import YOLO11, YOLOPostProcessor, YOLOPostProcessorNumpy CLASSES = [ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)): shape = im.shape[:2] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] dw, dh = dw / 2, dh / 2 if shape[::-1] != new_unpad: im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) return im, r, (dw, dh) def xywh2xyxy(x): y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = x[..., 0] - x[..., 2] / 2 y[..., 1] = x[..., 1] - x[..., 3] / 2 y[..., 2] = x[..., 0] + x[..., 2] / 2 y[..., 3] = x[..., 1] + x[..., 3] / 2 return y def non_max_suppression(prediction, conf_thres=0.01, iou_thres=0.45, max_det=300): prediction = prediction.transpose(1, 2) bs = prediction.shape[0] nc = prediction.shape[2] - 4 xc = prediction[..., 4:].max(-1)[0] > conf_thres output = [torch.zeros((0, 6), device=prediction.device)] * bs for xi, x in enumerate(prediction): x = x[xc[xi]] if not x.shape[0]: continue box = xywh2xyxy(x[:, :4]) conf, j = x[:, 4:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] n = x.shape[0] if not n: continue elif n > max_det: x = x[x[:, 4].argsort(descending=True)[:max_det]] c = x[:, 5:6] * 7680 # classes boxes, scores = x[:, :4] + c, x[:, 4] i = torchvision.ops.nms(boxes, scores, iou_thres) output[xi] = x[i] return output def non_max_suppression_numpy(prediction, conf_thres=0.25, iou_thres=0.45, max_det=300): bs = prediction.shape[0] output = [np.zeros((0, 6), dtype=np.float32)] * bs for xi, x in enumerate(prediction): bbox_xywh = x[:, :4] class_probs = x[:, 4:] class_ids = np.argmax(class_probs, axis=1) confidences = np.max(class_probs, axis=1) mask = confidences > conf_thres bbox_xywh = bbox_xywh[mask] confidences = confidences[mask] class_ids = class_ids[mask] if len(confidences) == 0: continue bbox_tlwh = np.copy(bbox_xywh) bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2 bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2 indices = cv2.dnn.NMSBoxes( bboxes=bbox_tlwh.tolist(), scores=confidences.tolist(), score_threshold=conf_thres, nms_threshold=iou_thres ) if len(indices) > 0: indices = indices.flatten() if len(indices) > max_det: indices = indices[:max_det] final_boxes_xywh = bbox_xywh[indices] final_boxes_xyxy = xywh2xyxy(final_boxes_xywh) final_scores = confidences[indices] final_classes = class_ids[indices] out_tensor = np.concatenate([ final_boxes_xyxy, final_scores[:, None], final_classes[:, None] ], axis=1) output[xi] = out_tensor return output def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") model = YOLO11(nc=80, scale='s') model.load_weights("my_yolo_result_qat/best_fp32_converted.pth") model.to(device) model.eval() post_std = YOLOPostProcessor(model.model[-1], use_segmentation=False) img_path = "1.jpg" img0 = cv2.imread(img_path) assert img0 is not None, f"Image Not Found {img_path}" img, ratio, (dw, dh) = letterbox(img0, new_shape=(640, 640)) img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img_tensor = torch.from_numpy(img).to(device) img_tensor = img_tensor.float() img_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 if img_tensor.ndim == 3: img_tensor = img_tensor.unsqueeze(0) print("开始推理...") with torch.no_grad(): pred = model(img_tensor) # pred = post_std(pred) # pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45) # det = pred[0] preds_raw_numpy = [p.cpu().numpy() for p in pred] post_numpy = YOLOPostProcessorNumpy(strides=[8, 16, 32], reg_max=16, use_segmentation=False) pred_numpy_decoded = post_numpy(preds_raw_numpy) pred_results = non_max_suppression_numpy(pred_numpy_decoded, conf_thres=0.25, iou_thres=0.45) det = pred_results[0] if len(det): det[:, [0, 2]] -= dw # x padding det[:, [1, 3]] -= dh # y padding det[:, :4] /= ratio # det[:, 0].clamp_(0, img0.shape[1]) # det[:, 1].clamp_(0, img0.shape[0]) # det[:, 2].clamp_(0, img0.shape[1]) # det[:, 3].clamp_(0, img0.shape[0]) det[:, 0] = np.clip(det[:, 0], 0, img0.shape[1]) det[:, 1] = np.clip(det[:, 1], 0, img0.shape[0]) det[:, 2] = np.clip(det[:, 2], 0, img0.shape[1]) det[:, 3] = np.clip(det[:, 3], 0, img0.shape[0]) print(f"检测到 {len(det)} 个目标") for *xyxy, conf, cls in det: c = int(cls) label = f'{CLASSES[c]} {conf:.2f}' p1, p2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) color = COLORS[c] cv2.rectangle(img0, p1, p2, color, 2, lineType=cv2.LINE_AA) t_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=1)[0] p2_label = p1[0] + t_size[0], p1[1] - t_size[1] - 3 cv2.rectangle(img0, p1, p2_label, color, -1, cv2.LINE_AA) cv2.putText(img0, label, (p1[0], p1[1] - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA) print(f" - {label} at {p1}-{p2}") cv2.imwrite("result.jpg", img0) print("结果已保存至 result.jpg") def import_os_exists(path): import os return os.path.exists(path) if __name__ == "__main__": main()