diff --git a/pycm/pycm_distance.py b/pycm/pycm_distance.py new file mode 100644 index 00000000..14433515 --- /dev/null +++ b/pycm/pycm_distance.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +"""Distance/Similarity functions.""" +from __future__ import division +from enum import Enum +import math + +class DistanceType(Enum): + """ + Distance metric type class. + >>> pycm.DistanceType.AMPLE + """ + + AMPLE = "AMPLE" + Anderberg = "Anderberg" + AndresMarzoDelta = "AndresMarzoDelta" + BaroniUrbaniBuserI = "BaroniUrbaniBuserI" + BaroniUrbaniBuserII = "BaroniUrbaniBuserII" + +def AMPLE_calc(TP, FP, FN, TN): + """ + Calculate AMPLE. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: AMPLE as float + """ + try: + part1 = TP/(TP + FP) + part2 = FN/(FN + TN) + return abs(part1 - part2) + except Exception: + return "None" + +def Anderberg_calc(TP, FP, FN, TN): + """ + Calculate Anderberg's D. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Anderberg's D as float + """ + try: + part1 = max(TP, FP) + max(FN, TN) + max(TP, FN) + max(FP, TN) + part2 = max(TP + FP, FP + TN) + max(TP + FP, FN + TN) + n = TP + FP + FN + TN + return (part1 - part2) / (2 * n) + except Exception: + return "None" + + +def AndresMarzoDelta_calc(TP, FP, FN, TN): + """ + Calculate Andres & Marzo's Delta. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Andres & Marzo's Delta as float + """ + try: + part1 = TP + TN - 2 * math.sqrt(FP * FN) + n = TP + FP + FN + TN + return part1 / n + except Exception: + return "None" + +def BaroniUrbaniBuserI_calc(TP, FP, FN, TN): + """ + Calculate Baroni-Urbani & Buser I. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Baroni-Urbani & Buser I as float + """ + try: + part1 = math.sqrt(TP * TN) + TP + part2 = part1 + FP + FN + return part1 / part2 + except Exception: + return "None" + +def BaroniUrbaniBuserII_calc(TP, FP, FN, TN): + """ + Calculate Baroni-Urbani & Buser II. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Baroni-Urbani & Buser II as float + """ + try: + part1 = math.sqrt(TP * TN) + TP - FP - FN + part2 = math.sqrt(TP * TN) + TP + FP + FN + return part1 / part2 + except Exception: + return "None" + + + +DISTANCE_MAPPER = {DistanceType.AMPLE: AMPLE_calc, DistanceType.Anderberg: Anderberg_calc, DistanceType.AndresMarzoDelta: AndresMarzoDelta_calc, DistanceType.BaroniUrbaniBuserI: + BaroniUrbaniBuserI_calc, DistanceType.BaroniUrbaniBuserII: BaroniUrbaniBuserII_calc} \ No newline at end of file diff --git a/pycm/pycm_obj.py b/pycm/pycm_obj.py index e2072e2f..6cd1f62c 100644 --- a/pycm/pycm_obj.py +++ b/pycm/pycm_obj.py @@ -6,6 +6,7 @@ from .pycm_handler import __obj_assign_handler__, __obj_file_handler__, __obj_matrix_handler__, __obj_vector_handler__, __obj_array_handler__ from .pycm_class_func import F_calc, IBA_calc, TI_calc, NB_calc, sensitivity_index_calc from .pycm_overall_func import weighted_kappa_calc, weighted_alpha_calc, alpha2_calc, brier_score_calc +from .pycm_distance import DistanceType, DISTANCE_MAPPER from .pycm_output import * from .pycm_util import * from .pycm_param import * @@ -591,6 +592,21 @@ def NB(self, w=1): except Exception: return {} + def distance(self, metric): + """ + Calculate distance/similarity for all classes. + + :param metric: metric + :type metric: DistanceType + :return: result as float + """ + distance_dict = {} + if not isinstance(metric, DistanceType): + pycmMatrixError(DISTANCE_METRIC_TYPE_ERROR) + for i in self.classes: + distance_dict[i] = DISTANCE_MAPPER[metric](TP = self.TP[i], FP = self.FP[i], FN = self.FN[i], TN = self.TN[i]) + return distance_dict + def CI( self, param, diff --git a/pycm/pycm_param.py b/pycm/pycm_param.py index f22c99a2..d94bae7f 100644 --- a/pycm/pycm_param.py +++ b/pycm/pycm_param.py @@ -117,6 +117,8 @@ CURVE_NONE_WARNING = "The curve axes contain non-numerical value(s)." +DISTANCE_METRIC_TYPE_ERROR = "The metric type must be DistanceType" + CLASS_NUMBER_THRESHOLD = 10 BALANCE_RATIO_THRESHOLD = 3