import torch import numpy as np import torchvision def xywh2xyxy(x): """Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right""" y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y return y def box_iou(box1, box2): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. box1: [N, 4] box2: [M, 4] Returns: [N, M] """ def box_area(box): return (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) area1 = box_area(box1) area2 = box_area(box2) lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] wh = (rb - lt).clamp(min=0) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] union = area1[:, None] + area2 - inter return inter / (union + 1e-6) def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, max_det=300): """ Non-Maximum Suppression (NMS) on inference results prediction: [Batch, 84, 8400] (for YOLOv8/11) """ # [Batch, 84, Anchors] -> [Batch, Anchors, 84] prediction = prediction.transpose(1, 2) bs = prediction.shape[0] nc = prediction.shape[2] - 4 xc = prediction[..., 4:].max(-1)[0] > conf_thres output = [torch.zeros((0, 6), device=prediction.device)] * bs for xi, x in enumerate(prediction): x = x[xc[xi]] if not x.shape[0]: continue # Box decoding box = xywh2xyxy(x[:, :4]) # Confidence and Class conf, j = x[:, 4:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] n = x.shape[0] if not n: continue elif n > max_det: x = x[x[:, 4].argsort(descending=True)[:max_det]] # Batched NMS c = x[:, 5:6] * 7680 boxes, scores = x[:, :4] + c, x[:, 4] i = torchvision.ops.nms(boxes, scores, iou_thres) output[xi] = x[i] return output def compute_ap(recall, precision): """ Compute the average precision, given the recall and precision curves """ # Append sentinel values to beginning and end mrec = np.concatenate(([0.0], recall, [1.0])) mpre = np.concatenate(([1.0], precision, [0.0])) # Compute the precision envelope mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) # Integrate area under curve method = 'interp' # methods: 'continuous', 'interp' if method == 'interp': x = np.linspace(0, 1, 101) # 101-point interp (COCO) ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate else: # 'continuous' i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve return ap, mpre, mrec def smooth(y, f=0.05): """Box filter of fraction f""" nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) p = np.ones(nf // 2) # ones padding yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16): """ Compute the average precision, given the recall and precision curves. """ # Sort by objectness i = np.argsort(-conf) tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] # Find unique classes unique_classes, nt = np.unique(target_cls, return_counts=True) nc = unique_classes.shape[0] # number of classes, number of detections # Create Precision-Recall curve and compute AP for each class px, py = np.linspace(0, 1, 1000), [] # for plotting ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) for ci, c in enumerate(unique_classes): i = pred_cls == c n_l = (target_cls == c).sum() # number of labels n_p = i.sum() # number of predictions if n_p == 0 or n_l == 0: continue # Accumulate FPs and TPs fpc = (1 - tp[i]).cumsum(0) tpc = tp[i].cumsum(0) # Recall recall = tpc / (n_l + eps) # recall curve r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases # Precision precision = tpc / (tpc + fpc) # precision curve p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score # AP from recall-precision curve for j in range(tp.shape[1]): ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) # Compute F1 (harmonic mean of precision and recall) f1 = 2 * p * r / (p + r + eps) i = smooth(f1.mean(0), 0.1).argmax() # max F1 index p, r, f1 = p[:, i], r[:, i], f1[:, i] tp = (r * nt).round().astype(int) fp = (tp / (p + eps) - tp).astype(int) return tp, fp, p, r, f1, ap, unique_classes.astype(int)