From daf394657967209cfa7934707f4861def39ab040 Mon Sep 17 00:00:00 2001 From: John Hawkins Date: Tue, 13 Apr 2021 21:34:17 +1000 Subject: [PATCH] Modifying test simulations --- minvime/estimator_classification.py | 8 ++--- scripts/run_churn_simulation.py | 54 +++++++++++++++++++++++++++++ scripts/run_simulation_one.py | 12 +++---- 3 files changed, 64 insertions(+), 10 deletions(-) create mode 100755 scripts/run_churn_simulation.py diff --git a/minvime/estimator_classification.py b/minvime/estimator_classification.py index cac503e..693a31f 100755 --- a/minvime/estimator_classification.py +++ b/minvime/estimator_classification.py @@ -73,9 +73,9 @@ def estimate_binary_model_requirements(tp, fp, tn, fn, cases, baserate, minroi=0 current_min_roi = roi tprs = y combinations = combinations + 1 - print("Tested ", combinations, " different AUC plots") - print("Number of Exponents", len(beta_range)) - print("Number of Alpha Weights", len(alpha_range)) + #print("Tested ", combinations, " different AUC plots") + #print("Number of Exponents", len(beta_range)) + #print("Number of Alpha Weights", len(alpha_range)) return min_auc, min_precision, min_recall, np.array(fprates), tprs ###################################################################### @@ -91,7 +91,7 @@ def generate_roc_auc(fprates, alpha, beta): def calculate_peak_roi(fprates, tprates, tp, fp, tn, fn, num_pos, num_neg): """ Calculate the maximal ROI for a given ROC curve (defined by vectors of FPR and TPR) """ - roi = -999999 + roi = -99999999999999 result_precision = 0.0 result_recall = 0.0 for index in range(len(fprates)): diff --git a/scripts/run_churn_simulation.py b/scripts/run_churn_simulation.py new file mode 100755 index 0000000..4084376 --- /dev/null +++ b/scripts/run_churn_simulation.py @@ -0,0 +1,54 @@ +""" +Churn Simulation + +We explore properties of a minimum viable model for churn problems with different criteria. + +In a churn situation every False Negative involves the cost of losing a customer. +The cost of a False Positive is the cost of an uneccessary intervention. + +A True Positive means that you have a chance of mitigating the churn. +Therefore the impact of a model making a True Positive is a reduction in the expected +loss of a Churn. + +For example, if churn involves a $10,000 loss per customer, and the +probability of an intervention working is 20%. Then True Positive reduces the expected +loss from $10,000 to $8,000. Or the model delivered $2,000 worth of value. + +This script will produce a table of model performance criteria for problems of +varying definitions in terms of the costs of both churn and intervention. +""" + +import sys +import pandas as pd + +sys.path.append('../minvime') +import estimator_classification as esti + +fns = [-10000,-8000,-6000,-4000,-2000,-1000] +fps = [-500,-400,-300,-200,-100] +success_rate = 0.2 + +tn = 0 +roi = 500000 +cases = 1000000 +baserate = 0.005 + +rez = pd.DataFrame() + +for fn in fns: + for fp in fps: + # A true positive is a reduction in expected loss (after considering intervention cost) + tp = fn - (fn * success_rate) + fp + current_churn_loss = cases * baserate * fn + # To deliver ROI we need to lift ROI from the current churn losses. + minroi = current_churn_loss + roi + auc, prec, recall, fprs, tprs = esti.estimate_binary_model_requirements( + tp=tp, fp=fp, tn=tn, fn=fn, cases=cases, + baserate=baserate, minroi=minroi + ) + rez = rez.append({'Total Loss':current_churn_loss, 'Loss p Cust':fn, 'Intvn Cost':fp, 'auc':auc, 'precision':prec, 'recall':recall}, ignore_index=True) + +print("Churn Model Requirements") +print("Criteria | Min ROI $%i | Cases %i | Base Churn Rate %f " % (roi,cases,baserate) ) +print(rez) + diff --git a/scripts/run_simulation_one.py b/scripts/run_simulation_one.py index 06f8c98..f811883 100755 --- a/scripts/run_simulation_one.py +++ b/scripts/run_simulation_one.py @@ -4,8 +4,8 @@ sys.path.append('../minvime') import estimator_classification as esti # The file ../minvime/estimator_classification.py -tps = [20000,10000,8000,6000,4000,2000,1000,500] -fps = [-1000,-900,-800,-600,-500,-400,-200,-100] +tps = [20000,10000,8000,6000,4000,2000,1000] +fps = [-900,-800,-600,-500,-400,-200,-100] tn = 0 fn = 0 @@ -17,12 +17,12 @@ for tp in tps: for fp in fps: - auc, prec, recall = esti.estimate_binary_model_requirements( + auc, prec, recall, fprs, tprs = esti.estimate_binary_model_requirements( tp=tp, fp=fp, tn=tn, fn=fn, cases=cases, baserate=baserate, minroi=minroi ) - print({'tp':tp, 'fp':fp, 'auc':auc, 'precision':prec, 'recall':recall}) - rez = rez.append({'tp':tp, 'fp':fp, 'auc':auc, 'precision':prec, 'recall':recall}, ignore_index=True) - + rez = rez.append({'TP Benefit':tp, 'FP Cost':fp, 'AUC':auc, 'precision':prec, 'recall':recall}, ignore_index=True) +print("Model Requirements") +print("Criteria | Min ROI $%i | Cases %i | Base Churn Rate %f " % (minroi,cases,baserate) ) print(rez)