Source code for thelper.optim.eval

"""Evaluation classes/funcs module.

This module contains procedures used to evaluate models and prediction results on specific
tasks or datasets. These procedures may be used as part of metric classes (defined in
:mod:`thelper.optim.metrics`) or high-level debug/drawing utilities.
"""
from typing import Dict, List, Optional, Union  # noqa: F401

import numpy as np
import torch

import thelper


[docs]@thelper.concepts.detection
def compute_bbox_iou(bbox1, bbox2):
    # type: (thelper.tasks.detect.BoundingBox, thelper.tasks.detect.BoundingBox) -> float
    """Computes and returns the Intersection over Union (IoU) of two bounding boxes."""
    assert isinstance(bbox1, thelper.data.BoundingBox) and isinstance(bbox2, thelper.data.BoundingBox), \
        "unexpected input bounding box types"
    bbox1_marg = 1 if bbox1.include_margin else 0
    bbox2_marg = 1 if bbox2.include_margin else 0
    intersection_width = min(bbox1.right + bbox1_marg, bbox2.right + bbox2_marg) - max(bbox1.left, bbox2.left)
    intersection_height = min(bbox1.bottom + bbox1_marg, bbox2.bottom + bbox2_marg) - max(bbox1.top, bbox2.top)
    intersection_area = max(0, intersection_width) * max(0, intersection_height)
    return float(intersection_area / float(bbox1.area + bbox2.area - intersection_area))


[docs]@thelper.concepts.segmentation
def compute_mask_iou(mask1, mask2, class_indices=None, dontcare=None):
    # type: (np.ndarray, np.ndarray, Union[List[int], np.ndarray, torch.Tensor], Optional[int]) -> Dict[int, float]
    """Computes and returns a map of Intersection over Union (IoU) scores for two segmentation masks."""
    # untested as of 11/2019; needs utest!
    assert isinstance(mask1, np.ndarray) and isinstance(mask2, np.ndarray), "invalid mask type"
    assert mask1.shape == mask2.shape, "mismatched mask shapes"
    assert np.issubdtype(mask1.dtype, np.integer), "mask1 dtype should be integer"
    assert np.issubdtype(mask2.dtype, np.integer), "mask2 dtype should be integer"
    if not class_indices:
        class_indices = np.unique(np.stack([mask1, mask2]))
    assert isinstance(class_indices, (list, np.ndarray, torch.Tensor)), "invalid class indices array type"
    iou_dict = {}
    for class_idx in class_indices:
        if dontcare is not None:
            target_c = np.logical_and(mask2 == class_idx, mask1 != dontcare)
            pred_c = np.logical_and(mask1 == class_idx, mask2 != dontcare)
        else:
            target_c = mask2 == class_idx
            pred_c = mask1 == class_idx
        intersection = np.logical_and(pred_c, target_c).sum()
        union = np.logical_or(pred_c, target_c).sum()
        if float(union) != 0.0:
            iou_dict[class_idx] = (float(intersection) / float(union))
        else:
            iou_dict[class_idx] = 0.0
    return iou_dict


[docs]@thelper.concepts.detection
def compute_pascalvoc_metrics(pred_bboxes, gt_bboxes, task, iou_threshold=0.5, method="all-points"):
    """Computes the metrics used by the VOC Pascal 2012 challenge.

    This function is inspired from the 'Object Detection Metrics' repository of Rafael Padilla.
    See https://github.com/rafaelpadilla/Object-Detection-Metrics for more information.
    The original code is distributed under the MIT License, Copyright (c) 2018 Rafael Padilla.

    Args:
        pred_bboxes: list of bbox predictions generated by the model under evaluation.
        gt_bboxes: list of groundtruth bounding boxes defined by the dataset.
        task: task definition object that holds a vector of all class names.
        iou_threshold: Intersection Over Union (IOU) threshold for true/false positive classification.
        method: the evaluation method to use; can be the the latest & official PASCAL VOC toolkit
            approach ("all-points"), or the 11-point approach ("11-points") described in the original
            paper ("The PASCAL Visual Object Classes(VOC) Challenge").

    Returns:
        A dictionary containing evaluation information and metrics for each class. Each entry contains:
        - ``precision``: array with the precision values;
        - ``recall``: array with the recall values;
        - ``AP``: average precision;
        - ``interpolated precision``: interpolated precision values;
        - ``interpolated recall``: interpolated recall values;
        - ``total positives``: total number of ground truth positives;
        - ``total TP``: total number of True Positive detections;
        - ``total FP``: total number of False Negative detections.
    """
    assert isinstance(pred_bboxes, (list, np.ndarray)) and all([isinstance(b, thelper.data.BoundingBox) for b in pred_bboxes]), \
        "invalid predictions format (expected list of bounding box objects)"
    assert all([isinstance(bbox.confidence, float) and 0 <= bbox.confidence <= 1 for bbox in pred_bboxes]), \
        "predicted bounding boxes must be provided with confidence values in [0,1]"
    assert all([bbox.image_id is not None for bbox in pred_bboxes]), "predicted bbox image id must be defined"
    assert isinstance(gt_bboxes, (list, np.ndarray)) and all([isinstance(b, thelper.data.BoundingBox) for b in gt_bboxes]), \
        "invalid input groundtruth format (expected list of bounding box objects)"
    assert all([bbox.image_id is not None for bbox in gt_bboxes]), "gt bbox image id must be defined"
    assert isinstance(task, thelper.tasks.Detection) and task.class_names, "invalid task object (should be detection)"
    assert 0 < iou_threshold <= 1, "invalid intersection over union value (should be in ]0,1])"
    assert method in ["all-points", "11-points"], "invalid method (should be 'all-points' or '11-points')"
    image_ids = list(set([bbox.image_id for bbox in pred_bboxes]) | set([bbox.image_id for bbox in gt_bboxes]))
    image_ids = {k: idx for idx, k in enumerate(image_ids)}
    gt_used_flags = [[[[bbox, False] for bbox in gt_bboxes
                       if (((isinstance(bbox.class_id, int) and bbox.class_id == ci) or bbox.class_id == cn) and
                           bbox.image_id == iid)] for iid in image_ids] for ci, cn in enumerate(task.class_names)]
    ret = {}
    for class_idx, class_name in enumerate(task.class_names):
        if task.background is not None and class_name == "background":
            continue
        curr_pred_bboxes = [bbox for bbox in pred_bboxes if (isinstance(bbox.class_id, int) and bbox.class_id == class_idx) or
                            bbox.class_id == class_name]
        curr_pred_bboxes = sorted(curr_pred_bboxes, key=lambda bbox: bbox.confidence, reverse=True)
        true_positives = np.zeros(len(curr_pred_bboxes))
        false_positives = np.zeros(len(curr_pred_bboxes))
        for pred_bbox_idx, pred_bbox in enumerate(curr_pred_bboxes):
            curr_gt_bboxes = gt_used_flags[class_idx][image_ids[pred_bbox.image_id]]
            best_gt_bbox_idx, best_gt_bbox_iou = -1, float("-inf")
            for gt_bbox_idx, (gt_bbox, gt_bbox_flag) in enumerate(curr_gt_bboxes):
                iou = compute_bbox_iou(pred_bbox, gt_bbox)
                if iou > best_gt_bbox_iou:
                    best_gt_bbox_iou = iou
                    best_gt_bbox_idx = gt_bbox_idx
            if best_gt_bbox_iou >= iou_threshold:
                curr_best_gt_bbox_used_flag = curr_gt_bboxes[best_gt_bbox_idx][1]
                if not curr_best_gt_bbox_used_flag:
                    true_positives[pred_bbox_idx] = 1
                    # we can only use GT bboxes once, flag them as 'seen' after that
                    curr_gt_bboxes[best_gt_bbox_idx][1] = True
                else:
                    # if best GT bbox was already used, we discard this detection
                    # (note: we could do some combinatorial optim w/ hungarian method to solve ideally instead)
                    false_positives[pred_bbox_idx] = 1
            else:
                # if we fail to meet the minimum iou threshold, discard this detection
                false_positives[pred_bbox_idx] = 1
        true_positive_cumsum = np.cumsum(true_positives)
        npos = sum([len(bboxes) for bboxes in gt_used_flags[class_idx]])
        recall = true_positive_cumsum / npos
        precision = np.divide(true_positive_cumsum, (np.cumsum(false_positives) + true_positive_cumsum))
        avg_prec, mpre, mrec, _ = compute_average_precision(precision.tolist(), recall.tolist(), method)
        ret[class_name] = {
            "class_name": class_name,
            "iou_threshold": iou_threshold,
            "eval_method": method,
            "precision": precision,
            "recall": recall,
            "AP": avg_prec,
            "interpolated precision": mpre,
            "interpolated recall": mrec,
            "total positives": npos,
            "total TP": np.sum(true_positives),
            "total FP": np.sum(false_positives)
        }
    return ret


[docs]@thelper.concepts.detection
def compute_average_precision(precision, recall, method="all-points"):
    """Computes the average precision given an array of precision and recall values.

    This function is inspired from the 'Object Detection Metrics' repository of Rafael Padilla.
    See https://github.com/rafaelpadilla/Object-Detection-Metrics for more information.
    The original code is distributed under the MIT License, Copyright (c) 2018 Rafael Padilla.

    Args:
        precision: list of precision values for the evaluated predictions of a class.
        recall: list of recall values for the evaluated predictions of a class.
        method: the evaluation method to use; can be the the latest & official PASCAL VOC toolkit
            approach ("all-points"), or the 11-point approach ("11-points") described in the original
            paper ("The PASCAL Visual Object Classes(VOC) Challenge").

    Returns:
        A 4-element tuple containing the average precision, rectified precision/recall arrays, and
        the indices used for the integral.
    """
    assert isinstance(precision, list) and all([0 <= p <= 1 for p in precision])
    assert isinstance(recall, list) and all([0 <= r <= 1 for r in recall])
    assert method in ["all-points", "11-points"]
    if method == "all-points":
        mprecision = [0, *precision, 0]  # pad with extrema
        # run backwards through precision values, eliminate ridges
        for idx in range(len(mprecision) - 1, 0, -1):
            mprecision[idx - 1] = max(mprecision[idx - 1], mprecision[idx])
        mrecall = [0, *recall, 1]  # pad with extrema
        # eliminate duplicates
        idxs = [idx + 1 for idx in range(len(mrecall) - 1) if mrecall[1:][idx] != mrecall[0:-1][idx]]
        avg_prec = 0
        # compute integral (AUC)
        for idx in idxs:
            avg_prec = avg_prec + np.sum((mrecall[idx] - mrecall[idx - 1]) * mprecision[idx])
        return avg_prec, mprecision[0:len(mprecision) - 1], mrecall[0:len(mprecision) - 1], idxs
    else:
        mprecision = [*precision]
        rho_interp, recall_val_id = [], []
        for r in np.linspace(0, 1, 11)[::-1]:
            ridxs = np.argwhere(np.asarray(recall) >= r)
            rho_interp.append(max(mprecision[ridxs.min():]) if ridxs.size != 0 else 0)
            recall_val_id.append(r)
        avg_prec = sum(rho_interp) / 11
        rvals = [recall_val_id[0], *recall_val_id, 0]
        pvals = [0, *rho_interp, 0]
        cc = []
        for i in range(len(rvals)):
            p = (rvals[i], pvals[i - 1])
            if p not in cc:
                cc.append(p)
            p = (rvals[i], pvals[i])
            if p not in cc:
                cc.append(p)
        return [avg_prec, [i[1] for i in cc], [i[0] for i in cc], None]
Navigation

Source code for thelper.optim.eval

Navigation