# -*- coding: utf-8 -*-
"""
Metrics used to evaluate the anomaly detection performance
"""
# Author: Yingtong Dou <ytongdou@gmail.com>, Kay Liu <zliu234@uic.edu>
# License: BSD 2 clause
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score, ndcg_score
[docs]def eval_roc_auc(labels, pred):
"""
ROC-AUC score for binary classification.
Parameters
----------
labels : numpy.ndarray
Labels in shape of ``(N, )``, where 1 represents outliers,
0 represents normal instances.
pred : numpy.ndarray
Outlier scores in shape of ``(N, )``.
Returns
-------
roc_auc : float
Average ROC-AUC score across different labels.
"""
# outlier detection is a binary classification problem
roc_auc = roc_auc_score(y_true=labels, y_score=pred)
return roc_auc
[docs]def eval_recall_at_k(labels, pred, k):
"""
Recall score for top k instances with the highest outlier scores.
Parameters
----------
labels : numpy.ndarray
Labels in shape of ``(N, )``, where 1 represents outliers,
0 represents normal instances.
pred : numpy.ndarray
Outlier scores in shape of ``(N, )``.
k : int
The number of instances to evaluate.
Returns
-------
recall_at_k : float
Recall for top k instances with the highest outlier scores.
"""
N = len(pred)
labels = np.array(labels)
pred = np.array(pred)
recall_at_k = sum(labels[pred.argpartition(N - k)[-k:]]) / sum(labels)
return recall_at_k
[docs]def eval_precision_at_k(labels, pred, k):
"""
Precision score for top k instances with the highest outlier scores.
Parameters
----------
labels : numpy.ndarray
Labels in shape of ``(N, )``, where 1 represents outliers,
0 represents normal instances.
pred : numpy.ndarray
Outlier scores in shape of ``(N, )``.
k : int
The number of instances to evaluate.
Returns
-------
precision_at_k : float
Precision for top k instances with the highest outlier scores.
"""
N = len(pred)
labels = np.array(labels)
pred = np.array(pred)
precision_at_k = sum(labels[pred.argpartition(N - k)[-k:]]) / k
return precision_at_k
[docs]def eval_average_precision(labels, pred):
"""
Average precision score for binary classification.
Parameters
----------
labels : numpy.ndarray
Labels in shape of ``(N, )``, where 1 represents outliers,
0 represents normal instances.
pred : numpy.ndarray
Outlier scores in shape of ``(N, )``.
Returns
-------
ap : float
Average precision score.
"""
# outlier detection is a binary classification problem
ap = average_precision_score(y_true=labels, y_score=pred)
return ap
[docs]def eval_ndcg(labels, pred):
"""
Normalized discounted cumulative gain for ranking.
Parameters
----------
labels : numpy.ndarray
Labels in shape of ``(N, )``, where 1 represents outliers,
0 represents normal instances.
pred : numpy.ndarray
Outlier scores in shape of ``(N, )``.
Returns
-------
ndcg : float
NDCG score.
"""
labels = np.array(labels)
pred = np.array(pred)
if labels.dtype == bool:
labels = labels.astype(int)
ndcg = ndcg_score(y_true=[labels], y_score=[pred])
return ndcg