Source code for alpaca.utils.ue_metrics

import numpy as np
from math import log2
from scipy.stats import percentileofscore
import torch


__all__ = ["get_uq_metrics", "uq_accuracy", "dcg", "ndcg", "uq_ndcg", "uq_ll"]


def get_uq_metrics(estimations, errors, acc_percentile=0.1, **kwargs):
    acc = uq_accuracy(estimations, errors, acc_percentile)
    ndcg = uq_ndcg(errors, estimations, **kwargs)
    ll = uq_ll(errors, estimations)
    return acc, ndcg, ll


[docs]def uq_accuracy(uq, errors, percentile=0.1):
    """Shows intersection of worst by error/uq in percentile"""
    k = int(len(uq) * percentile)
    worst_uq = np.argsort(np.ravel(uq))[-k:]
    worst_error = np.argsort(np.ravel(errors))[-k:]
    return len(set(worst_uq).intersection(set(worst_error))) / k


[docs]def dcg(relevances, scores, k):
    """
    Discounting cumulative gain, metric of ranking quality
    For UQ - relevance is ~ error, scores is uq
    """
    relevances = np.ravel(relevances)
    scores = np.ravel(scores)

    ranking = np.argsort(scores)[::-1]
    metric = 0
    for rank, score_id in enumerate(ranking[:k]):
        metric += relevances[score_id] / log2(rank + 2)

    return metric


[docs]def ndcg(relevances, scores):
    """
    Normalized DCG. We norm fact DCG on ideal DCG score
    expect relevances, scores to be numpy ndarrays
    """
    k = sum(relevances != 0)
    return dcg(relevances, scores, k) / dcg(relevances, relevances, k)


[docs]def uq_ndcg(errors, uq, bins=None):
    """
    In UQ we care most of top erros,
    so we restructure errors to give top errors bigger relevance
    """
    if bins is None:
        bins = [80, 95, 99]

    sorted_errors = sorted(errors)
    errors_percentiles = [percentileofscore(sorted_errors, error) for error in errors]
    errors_digitized = np.digitize(errors_percentiles, bins)

    return ndcg(errors_digitized, uq)


def uq_ll(errors, uq):
    errors = np.ravel(errors)
    uq_squared = np.square(np.ravel(uq)) + 1e-10
    return -np.mean(np.log(uq_squared) / 2 + np.square(errors) / 2 / uq_squared)


[docs]def classification_metric(uncertainties, correct_predictions):
    """
    Classification metric
    """
    accumulation = []
    uq = uncertainties
    idx = np.argsort(uq)
    for fraction in np.arange(0.5, 1.01, 0.01):
        part_size = int(fraction * len(idx))
        part = correct_predictions[idx][:part_size]
        accuracy = torch.true_divide(torch.sum(part), len(part))
        accumulation.append([fraction, accuracy])
    return np.array(accumulation).T
Source code for alpaca.utils.ue_metrics

alpaca

Navigation

Related Topics