From 1bc477539db60f28d55fc232df5269184d3623dd Mon Sep 17 00:00:00 2001 From: John Date: Wed, 2 Feb 2022 19:46:00 +1100 Subject: [PATCH] Adding ascii plot functionality for CLI. Experment stats in list. Test coverage for execution stats --- projit/ascii_plot.py | 107 ++++++++++++++++++++++++++++++++++++++++ projit/cli.py | 83 ++++++++++++++++++++++++++----- projit/projit.py | 34 ++++++++++++- tests/test_functions.py | 16 ++++-- 4 files changed, 222 insertions(+), 18 deletions(-) create mode 100644 projit/ascii_plot.py diff --git a/projit/ascii_plot.py b/projit/ascii_plot.py new file mode 100644 index 0000000..8294004 --- /dev/null +++ b/projit/ascii_plot.py @@ -0,0 +1,107 @@ +from math import log, exp +""" + This function was taken from the GitHub gist https://gist.github.com/fransua/6165813 + It was modified to work with Python 3, provide neater formatting on the tick labels + and fix some problems with extreme values being occasionally ommitted. + Note: Plot title was removed because we will be using it within functions that precede + the calls with their own titles. +""" + +def ascii_plot (ydata, xdata=None, logscale=False, pch='o', + xlabel='X', ylabel='Y', width=72, height=50): + """ + :param ydata: list of values to be plotted + :param None xdata: x coordinate corresponding to ydata. If None will range + between 1 and the length of ydata. + :param False logscale: display data with logarithmic Y axis + :param 'o' pch: string for points (whatever + = - * etc...) + :param 'plot' title: string for title of the plot + :param 'X' xlabel: label for the X axis + :param 'Y' ylabel: label for the Y axis + :param 100 width: width in term of characters + :param 100 height: height in term of characters + :returns: string corresponding to plot + """ + if not xdata: + xdata = range(1, len(ydata)+1) + yydata = [] + logf = log if logscale else lambda x: x + expf = exp if logscale else lambda x: x + for i in ydata: + try: + yydata.append(logf(i)) + except ValueError: + yydata.append(float('-inf')) + ymax = max(yydata) + ymax = ymax + (ymax*0.05) + ydiff = float(abs(float(min(yydata)) - ymax)/(height * 2)) + y_arange = [(i - ydiff, i + ydiff) for i in + sorted(arange(min(yydata), ymax + ydiff, ydiff * 2), reverse=True)] + xdiff = float(abs(float(min(xdata)) - max(xdata)))/(width * 2) + x_arange = [(i-xdiff, i+xdiff) for i in + sorted(arange(float(min(xdata)), max(xdata) + xdiff, xdiff * 2))] + graph = ylabel + graph += '\n' + val = 6 - max([len('{0:.0f}'.format(y)) for _, y in y_arange]) + form = '{' + ':<7.{}f'.format(val) + '}' + + def add_y_point(value): + temp = form.format(value) + temp2 = temp.rstrip(' ') + spacer = len(temp) - len(temp2) + temp2 = temp2.rstrip('0') + if temp2[-1] == ".": + temp2 += "0" + diff = len(temp) - len(temp2) - spacer + temp3 = (" "*spacer) + temp2 + (" "*diff) + "+" + return temp3 + + graph += add_y_point( expf(ymax) ) + for yval, (y1, y2) in enumerate(y_arange): + graph+='\n' + if not (yval)%5 and yval != 0: + graph += add_y_point(expf((y1+y2)/2)) + else: + graph += ' ' * 7 + '|' + pos = 0 + for x1, x2 in x_arange: + for i in range(pos, len(yydata)): + if (y1 < yydata[i] <= y2 and + x1 < xdata[i] <= x2): + graph += pch + pos += 1 + break + else: + graph += ' ' + graph += '\n' + if logscale: + graph += ' 1/inf ' + ''.join( + ['+' if not x%10 else '-' for x in range(width+1)]) + '\n' + else: + graph += ' 0 ' + ''.join( + ['+' if not x%10 else '-' for x in range(width+1)]) + '\n' + val = 7 - max([len('{0:.0f}'.format(y)) for _, y in x_arange]) + form = '{' + ':<7.{}f'.format(val) + '} ' + + def add_x_point(value): + temp = form.format(value) + temp2 = temp.rstrip(' ') + temp2 = temp2.rstrip('0') + if temp2[-1] == ".": + temp2 += "0" + diff = len(temp) - len(temp2) + temp3 = temp2 + (" "*diff) + return temp3 + + graph += ' '*7 + ''.join( + [ add_x_point(float(sum(x_arange[x])/2)) for x in range(0,width,10)] + ) + ('' if width % 10 else add_x_point(float(sum(x_arange[-1])/2)))+ '\n' + graph += ' ' * 7 + '{0:^{1}}'.format(xlabel, width) + graph += '\n' + return graph + + +def arange(beg, end, step): + return [beg + i * step for i in range(int(abs(beg-end)/step+.5))] + + diff --git a/projit/cli.py b/projit/cli.py index a898c57..fcb00c4 100644 --- a/projit/cli.py +++ b/projit/cli.py @@ -17,6 +17,7 @@ from .utils import write_properties from .projit import load as projit_load from .projit import init as projit_init +from .ascii_plot import ascii_plot from projit import __version__ @@ -30,8 +31,6 @@ def task_init(name, template=''): if config_file != "": print("ERROR: Projit Project already exists. Run `projit update` to change details.") exit(1) -# print("Please enter a description for your project (or Press Enter to Cancel)") -# descrip = input(">") descrip = "" if len(template)>9: if template[0:9]=="template=": @@ -65,26 +64,50 @@ def task_status(project): print(" Experiments: %i" % len(project.experiments)) print("") +########################################################################################## +def filler(current, max_len, content=" "): + return content * (max_len - current) + +########################################################################################## +def print_header(header): + full_header = header + ("_" * (80-len(header))) + print(full_header) + ########################################################################################## def task_list(subcmd, project, dataset, markdown): """ List content of a project from the command line """ + print() if subcmd == "datasets": - print(" ___Datasets________________________________________") + print_header("__Datasets") if len(project.datasets.keys()) > 0: long_key = max([len(k) for k in project.datasets.keys()]) + myhead = "__Name" + filler(len("Name"), long_key+3, "_") + "Path_" + print_header(myhead) for ds in project.datasets: - print(" ", ds, filler(len(ds), long_key+1 ), project.datasets[ds] ) + print(" ", ds, filler(len(ds), long_key+3 ), project.datasets[ds], sep="" ) else: print(" NONE") print("") elif subcmd == "experiments": - print(" ___Experiments_____________________________________") + print_header("__Experiments") if len(project.experiments) > 0: long_key = max([len(k[0]) for k in project.experiments]) + myhead = "__Name__" + filler(len("Name__"), long_key+3, "_") + "Runs__" + "MeanTime____" + "Path" + print_header(myhead) for exp in project.experiments: - print(" ", exp[0], filler(len(exp[0]), long_key+1 ), exp[1] ) + execs, mean_time = project.get_experiment_execution_stats(exp[0]) + mins, secs = divmod(mean_time, 60) + if mins>0: + mytime = f"{int(mins)}M {int(secs)}s" + else: + mytime = f"{int(secs)}s" + print(" ", exp[0], filler(len(exp[0]), long_key+3), + execs, filler(len(str(execs)), 6), + mytime, filler(len(str(mytime)), 12), + exp[1], sep="" + ) else: print(" NONE") print("") @@ -99,11 +122,11 @@ def task_list(subcmd, project, dataset, markdown): if markdown: print_results_markdown(title, rez) else: - print(" ___Results__________________________________[ %s ]___" % dataset) + print_header(f"__Results__[{dataset}]") pd.set_option('expand_frame_repr', False) pd.set_option('display.max_columns', 999) print(rez) - + print() else: print(" ERROR: List received an unrecognised sub-command: %s" % subcmd) exit(1) @@ -117,9 +140,6 @@ def task_render(project, path): """ project.render(path) -########################################################################################## -def filler(current, max_len): - return " " * (max_len - current) ########################################################################################## @@ -175,7 +195,7 @@ def widthGenerator(col_names, col_widths): rowcontent += "| %s%s "%( " "*(colwidth-len(content)-2), content ) rowcontent += "|" print(rowcontent) - print("\n") + print() ########################################################################################## def task_add(project, asset, name, path): @@ -214,6 +234,32 @@ def task_rm(project, asset, name): else: print(f"** Remove command for {asset} named {name} cancelled ** ") +########################################################################################## +def task_plot(project, experiment, property, metric): + if property == "execution": + print() + print_header(f"__Experiment_[{experiment}]_execution_time_") + values = project.get_execution_times(experiment) + print(ascii_plot(values, xlabel='Iteration', ylabel='Seconds', width=70, height=12)) + print() + elif property == "hyperparam": + print() + print_header(f"__Experiment_[{experiment}]_hyperparameter_[{metric}]_") + print(" TODO") + print() + #print(ascii_plot([50,90,130,70,60,0,80,120,100], xlabel='Iteration', ylabel=metric, width=70, height=12)) + elif property == "result": + print() + print_header(f"__Experiment_[{experiment}]_result_[{metric}]_") + print(" TODO") + print() + #print(ascii_plot([50,90,130,70,60,0,80,120,100], xlabel='Iteration', ylabel=metric, width=70, height=12)) + else: + print() + print(f"\nUnrecognized Experiment Property [{property}] -- Valid Options [execution,hyperparam,result]") + print() + + ########################################################################################## def print_usage(prog): """ Command line application usage instrutions. """ @@ -236,7 +282,11 @@ def print_usage(prog): print(" ", prog, "list datasets # List the available datasets") print(" ", prog, "list experiments # List the registered experiments") print(" ", prog, "list results # List the registered results ") - print(" ", prog, "-m list results test # List results on test data in markdown") + print(" ", prog, "list results test # List the registered results on dataset 'test' ") + print(" ", prog, "plot initial execution # Plot the execution times for the experiment named 'initial'") + print(" ", prog, "plot initial hyperparam alpha # Plot the change in hyperparam 'alpha' for the experiment named 'initial'") + print(" ", prog, "plot initial result MSE # Plot the change in result 'MSE' for the experiment named 'initial'") + print(" ", prog, "-m list results test # List results on 'test' data in markdown") print(" ", prog, "rm experiment explore # Remove the experiment explore (requires confirmation)") print(" ", prog, "rm experiment . # Remove all experiments (requires confirmation)") print("") @@ -266,6 +316,10 @@ def main(): list_parser.add_argument('subcmd') list_parser.add_argument('dataset', nargs='?', default="") + plot_parser = subparsers.add_parser('plot') + plot_parser.add_argument('experiment') + plot_parser.add_argument('property') + plot_parser.add_argument('metric', nargs='?', default="") rm_parser = subparsers.add_parser('rm') rm_parser.add_argument('asset') @@ -311,6 +365,9 @@ def main(): if args.cmd == 'rm': task_rm(project, args.asset, args.name) + if args.cmd == 'plot': + task_plot(project, args.experiment, args.property, args.metric) + if args.cmd == 'update': task_update(project) diff --git a/projit/projit.py b/projit/projit.py index 49daf4c..e4357a7 100644 --- a/projit/projit.py +++ b/projit/projit.py @@ -1,5 +1,6 @@ from datetime import datetime import pandas as pd +import numpy as np import hashlib import git import json @@ -187,6 +188,36 @@ def end_experiment(self, name, id, hyperparams={}): self.save() + def get_experiment_execution_stats(self, name): + """ + Given an experiment name + Return the execution statistics + """ + if name in self.executions: + mean_exec_time = self.get_mean_execution_time(name) + return len(self.executions[name]), mean_exec_time + else: + return 0, 0 + + def get_mean_execution_time(self, name): + exec_times = self.get_execution_times(name) + if len(exec_times) > 0: + return np.mean(exec_times) + else: + return 0 + + def get_execution_times(self, name): + if name in self.executions: + exec_times = [] + for execid, exec in self.executions[name].items(): + a = datetime.strptime(exec["start"], '%Y-%m-%d %H:%M:%S.%f') + b = datetime.strptime(exec["end"], '%Y-%m-%d %H:%M:%S.%f') + diff = (b-a).seconds + exec_times.append(diff) + return exec_times + else: + return [] + def add_experiment(self, name, path): """ Add information of a new experiment to the project. @@ -405,8 +436,7 @@ def get_results(self, dataset=None): else: rez = {} rez['experiment'] = key - df = df.append(rez, ignore_index=True) - + df = pd.concat([df, pd.DataFrame(rez, index=[0])], ignore_index=True) # Ensure that the first column in the results is "experiments" cols = ["experiment"] rest = df.columns.to_list() diff --git a/tests/test_functions.py b/tests/test_functions.py index 5ebbbba..f8664b4 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1,6 +1,7 @@ import os import pytest import shutil +import time from os import path import pandas as pd import datatest as dt @@ -181,18 +182,27 @@ def test_experiment_remove_all(): os.chdir("../") shutil.rmtree(testdir) + ################################################################# -def test_experiment_execution(): +def test_experiment_executions(): testdir = "temp_test_dir_xyz" os.mkdir(testdir) os.chdir(testdir) project = proj.init("default", "test execution", "execution test") exec_id = project.start_experiment("Initial Exp", "experiments/exp_one.py", params={}) + time.sleep(2) project.end_experiment("Initial Exp", exec_id, hyperparams={}) - assert len(project.executions['Initial Exp']) == 1 - os.chdir("../") + exec_id = project.start_experiment("Initial Exp", "experiments/exp_one.py", params={}) + time.sleep(6) + project.end_experiment("Initial Exp", exec_id, hyperparams={}) + execs, mean_time = project.get_experiment_execution_stats("Initial Exp") + assert execs == 2 + assert mean_time == pytest.approx(4, 1.0) + os.chdir("../") shutil.rmtree(testdir) + + ################################################################# def test_project_params(): testdir = "temp_test_dir_xyz"