Source code for thelper.optim.eval

"""Evaluation classes/funcs module.

This module contains procedures used to evaluate models and prediction results on specific
tasks or datasets. These procedures may be used as part of metric classes (defined in
:mod:`thelper.optim.metrics`) or high-level debug/drawing utilities.
"""
from typing import Dict, List, Optional, Union  # noqa: F401

import numpy as np
import torch

import thelper


[docs]@thelper.concepts.detection def compute_bbox_iou(bbox1, bbox2): # type: (thelper.tasks.detect.BoundingBox, thelper.tasks.detect.BoundingBox) -> float """Computes and returns the Intersection over Union (IoU) of two bounding boxes.""" assert isinstance(bbox1, thelper.data.BoundingBox) and isinstance(bbox2, thelper.data.BoundingBox), \ "unexpected input bounding box types" bbox1_marg = 1 if bbox1.include_margin else 0 bbox2_marg = 1 if bbox2.include_margin else 0 intersection_width = min(bbox1.right + bbox1_marg, bbox2.right + bbox2_marg) - max(bbox1.left, bbox2.left) intersection_height = min(bbox1.bottom + bbox1_marg, bbox2.bottom + bbox2_marg) - max(bbox1.top, bbox2.top) intersection_area = max(0, intersection_width) * max(0, intersection_height) return float(intersection_area / float(bbox1.area + bbox2.area - intersection_area))
[docs]@thelper.concepts.segmentation def compute_mask_iou(mask1, mask2, class_indices=None, dontcare=None): # type: (np.ndarray, np.ndarray, Union[List[int], np.ndarray, torch.Tensor], Optional[int]) -> Dict[int, float] """Computes and returns a map of Intersection over Union (IoU) scores for two segmentation masks.""" # untested as of 11/2019; needs utest! assert isinstance(mask1, np.ndarray) and isinstance(mask2, np.ndarray), "invalid mask type" assert mask1.shape == mask2.shape, "mismatched mask shapes" assert np.issubdtype(mask1.dtype, np.integer), "mask1 dtype should be integer" assert np.issubdtype(mask2.dtype, np.integer), "mask2 dtype should be integer" if not class_indices: class_indices = np.unique(np.stack([mask1, mask2])) assert isinstance(class_indices, (list, np.ndarray, torch.Tensor)), "invalid class indices array type" iou_dict = {} for class_idx in class_indices: if dontcare is not None: target_c = np.logical_and(mask2 == class_idx, mask1 != dontcare) pred_c = np.logical_and(mask1 == class_idx, mask2 != dontcare) else: target_c = mask2 == class_idx pred_c = mask1 == class_idx intersection = np.logical_and(pred_c, target_c).sum() union = np.logical_or(pred_c, target_c).sum() if float(union) != 0.0: iou_dict[class_idx] = (float(intersection) / float(union)) else: iou_dict[class_idx] = 0.0 return iou_dict
[docs]@thelper.concepts.detection def compute_pascalvoc_metrics(pred_bboxes, gt_bboxes, task, iou_threshold=0.5, method="all-points"): """Computes the metrics used by the VOC Pascal 2012 challenge. This function is inspired from the 'Object Detection Metrics' repository of Rafael Padilla. See https://github.com/rafaelpadilla/Object-Detection-Metrics for more information. The original code is distributed under the MIT License, Copyright (c) 2018 Rafael Padilla. Args: pred_bboxes: list of bbox predictions generated by the model under evaluation. gt_bboxes: list of groundtruth bounding boxes defined by the dataset. task: task definition object that holds a vector of all class names. iou_threshold: Intersection Over Union (IOU) threshold for true/false positive classification. method: the evaluation method to use; can be the the latest & official PASCAL VOC toolkit approach ("all-points"), or the 11-point approach ("11-points") described in the original paper ("The PASCAL Visual Object Classes(VOC) Challenge"). Returns: A dictionary containing evaluation information and metrics for each class. Each entry contains: - ``precision``: array with the precision values; - ``recall``: array with the recall values; - ``AP``: average precision; - ``interpolated precision``: interpolated precision values; - ``interpolated recall``: interpolated recall values; - ``total positives``: total number of ground truth positives; - ``total TP``: total number of True Positive detections; - ``total FP``: total number of False Negative detections. """ assert isinstance(pred_bboxes, (list, np.ndarray)) and all([isinstance(b, thelper.data.BoundingBox) for b in pred_bboxes]), \ "invalid predictions format (expected list of bounding box objects)" assert all([isinstance(bbox.confidence, float) and 0 <= bbox.confidence <= 1 for bbox in pred_bboxes]), \ "predicted bounding boxes must be provided with confidence values in [0,1]" assert all([bbox.image_id is not None for bbox in pred_bboxes]), "predicted bbox image id must be defined" assert isinstance(gt_bboxes, (list, np.ndarray)) and all([isinstance(b, thelper.data.BoundingBox) for b in gt_bboxes]), \ "invalid input groundtruth format (expected list of bounding box objects)" assert all([bbox.image_id is not None for bbox in gt_bboxes]), "gt bbox image id must be defined" assert isinstance(task, thelper.tasks.Detection) and task.class_names, "invalid task object (should be detection)" assert 0 < iou_threshold <= 1, "invalid intersection over union value (should be in ]0,1])" assert method in ["all-points", "11-points"], "invalid method (should be 'all-points' or '11-points')" image_ids = list(set([bbox.image_id for bbox in pred_bboxes]) | set([bbox.image_id for bbox in gt_bboxes])) image_ids = {k: idx for idx, k in enumerate(image_ids)} gt_used_flags = [[[[bbox, False] for bbox in gt_bboxes if (((isinstance(bbox.class_id, int) and bbox.class_id == ci) or bbox.class_id == cn) and bbox.image_id == iid)] for iid in image_ids] for ci, cn in enumerate(task.class_names)] ret = {} for class_idx, class_name in enumerate(task.class_names): if task.background is not None and class_name == "background": continue curr_pred_bboxes = [bbox for bbox in pred_bboxes if (isinstance(bbox.class_id, int) and bbox.class_id == class_idx) or bbox.class_id == class_name] curr_pred_bboxes = sorted(curr_pred_bboxes, key=lambda bbox: bbox.confidence, reverse=True) true_positives = np.zeros(len(curr_pred_bboxes)) false_positives = np.zeros(len(curr_pred_bboxes)) for pred_bbox_idx, pred_bbox in enumerate(curr_pred_bboxes): curr_gt_bboxes = gt_used_flags[class_idx][image_ids[pred_bbox.image_id]] best_gt_bbox_idx, best_gt_bbox_iou = -1, float("-inf") for gt_bbox_idx, (gt_bbox, gt_bbox_flag) in enumerate(curr_gt_bboxes): iou = compute_bbox_iou(pred_bbox, gt_bbox) if iou > best_gt_bbox_iou: best_gt_bbox_iou = iou best_gt_bbox_idx = gt_bbox_idx if best_gt_bbox_iou >= iou_threshold: curr_best_gt_bbox_used_flag = curr_gt_bboxes[best_gt_bbox_idx][1] if not curr_best_gt_bbox_used_flag: true_positives[pred_bbox_idx] = 1 # we can only use GT bboxes once, flag them as 'seen' after that curr_gt_bboxes[best_gt_bbox_idx][1] = True else: # if best GT bbox was already used, we discard this detection # (note: we could do some combinatorial optim w/ hungarian method to solve ideally instead) false_positives[pred_bbox_idx] = 1 else: # if we fail to meet the minimum iou threshold, discard this detection false_positives[pred_bbox_idx] = 1 true_positive_cumsum = np.cumsum(true_positives) npos = sum([len(bboxes) for bboxes in gt_used_flags[class_idx]]) recall = true_positive_cumsum / npos precision = np.divide(true_positive_cumsum, (np.cumsum(false_positives) + true_positive_cumsum)) avg_prec, mpre, mrec, _ = compute_average_precision(precision.tolist(), recall.tolist(), method) ret[class_name] = { "class_name": class_name, "iou_threshold": iou_threshold, "eval_method": method, "precision": precision, "recall": recall, "AP": avg_prec, "interpolated precision": mpre, "interpolated recall": mrec, "total positives": npos, "total TP": np.sum(true_positives), "total FP": np.sum(false_positives) } return ret
[docs]@thelper.concepts.detection def compute_average_precision(precision, recall, method="all-points"): """Computes the average precision given an array of precision and recall values. This function is inspired from the 'Object Detection Metrics' repository of Rafael Padilla. See https://github.com/rafaelpadilla/Object-Detection-Metrics for more information. The original code is distributed under the MIT License, Copyright (c) 2018 Rafael Padilla. Args: precision: list of precision values for the evaluated predictions of a class. recall: list of recall values for the evaluated predictions of a class. method: the evaluation method to use; can be the the latest & official PASCAL VOC toolkit approach ("all-points"), or the 11-point approach ("11-points") described in the original paper ("The PASCAL Visual Object Classes(VOC) Challenge"). Returns: A 4-element tuple containing the average precision, rectified precision/recall arrays, and the indices used for the integral. """ assert isinstance(precision, list) and all([0 <= p <= 1 for p in precision]) assert isinstance(recall, list) and all([0 <= r <= 1 for r in recall]) assert method in ["all-points", "11-points"] if method == "all-points": mprecision = [0, *precision, 0] # pad with extrema # run backwards through precision values, eliminate ridges for idx in range(len(mprecision) - 1, 0, -1): mprecision[idx - 1] = max(mprecision[idx - 1], mprecision[idx]) mrecall = [0, *recall, 1] # pad with extrema # eliminate duplicates idxs = [idx + 1 for idx in range(len(mrecall) - 1) if mrecall[1:][idx] != mrecall[0:-1][idx]] avg_prec = 0 # compute integral (AUC) for idx in idxs: avg_prec = avg_prec + np.sum((mrecall[idx] - mrecall[idx - 1]) * mprecision[idx]) return avg_prec, mprecision[0:len(mprecision) - 1], mrecall[0:len(mprecision) - 1], idxs else: mprecision = [*precision] rho_interp, recall_val_id = [], [] for r in np.linspace(0, 1, 11)[::-1]: ridxs = np.argwhere(np.asarray(recall) >= r) rho_interp.append(max(mprecision[ridxs.min():]) if ridxs.size != 0 else 0) recall_val_id.append(r) avg_prec = sum(rho_interp) / 11 rvals = [recall_val_id[0], *recall_val_id, 0] pvals = [0, *rho_interp, 0] cc = [] for i in range(len(rvals)): p = (rvals[i], pvals[i - 1]) if p not in cc: cc.append(p) p = (rvals[i], pvals[i]) if p not in cc: cc.append(p) return [avg_prec, [i[1] for i in cc], [i[0] for i in cc], None]