Source code for projit.cli

import argparse
import pandas as pd
import numpy as np
import sys
import os

# -*- coding: utf-8 -*-
  
"""
   projit.cli: Command line interface for projit.
   This file provide argument parsing and execution via entry point main()
"""

from .utils import locate_projit_config
from .config import config_folder
from .utils import initialise_project
from .utils import get_properties
from .utils import write_properties
from .projit import load as projit_load
from .projit import init as projit_init
from .ascii_plot import ascii_plot
from .latex_table import print_latex

from projit import __version__

project = None

##################################################################################

[docs]
def task_init(name, template=''):
    """
    CLI Internal Task Function: Initialise a project from the command line.
    This function will initate a project with a blank description.
    Users will need to update this in subsequent interation.

    :param name: The name of the project
    :type name: String, required

    :param template: The name of the template to use when initialising
    :type template: String, optional

    :return: None
    :rtype: None
    """
    config_file = locate_projit_config()
    if config_file != "":
        print("ERROR: Projit Project already exists. Run `projit update` to change details.")
        exit(1)
    descrip = ""
    if len(template)>9:
        if template[0:9]=="template=":
            template=template[9:]
    project = projit_init(template, name, descrip)



##################################################################################

[docs]
def task_update(project):
    """
    CLI Internal Task Function: Update a project from the command line

    This function invokes an interaction via the terminal to update
    the project properties.

    :return: None
    :rtype: None
    """
    print("Current Project Name: ", project.name)
    print("Enter an alternative project name (or press enter to keep)")
    name = input(">")
    print("Current Description: ", project.desc)
    print("Enter an alternative description (or press enter to keep)")
    descrip = input(">")
    if name == "":
        name = project.name
    if descrip == "":
        descrip = project.desc
    project.update_name_description(name, descrip)


##################################################################################

[docs]
def task_status(project):
    """
    CLI Internal Task Function: Print the project properties to the command line

    :param project: The projit project object
    :type project: Projit, required

    :return: None
    :rtype: None
    """
    print("")
    print("  Project: %s" % project.name)
    print("  Description: %s" % project.desc)
    print("  Datasets: %i" % len(project.datasets))
    print("  Experiments: %i" % len(project.experiments))
    print("  Executions: %i" % len(project.executions))
    print("")


##################################################################################

[docs]
def filler(current, max_len, content=" "):
    """
    Internal function to fill a string with spaces to max_len

    :param current: The length of the current content
    :type current: Int, required

    :param max_len: The maximum string length
    :type max_len: Int, required

    :param content: The character to fill with (default ' ')
    :type content: Char, optional

    :return: filled_content
    :rtype: String
    """
    return content * (max_len - current)


##################################################################################

[docs]
def print_header(header):
    full_header = header + ("_" * (90-len(header)))
    print(full_header)


##################################################################################

[docs]
def task_compare(project, datasets, metric, format, precision):
    """
    CLI Internal Task Function: Compare results across muliple datasets.

    This command loads the results for each dataset and extracts just the records
    for the specified metric to compile the comparison dataset to display.

    :param project: The projit project object
    :type project: Projit, required

    :param datasets: The list of datasets to compare
    :type datasets: list(String), required

    :param metric: The metric to use for comparison
    :type metric: String, required

    :param format: The output format (markdown|latex|default)
    :type format: String, required

    :param precision: The precision for results in the table 
    :type precision: Int, required

    :return: None
    :rtype: None
    """
    title = "Compare Results" 
    warning = ""
    results = None
    for dataset in datasets: 
       rez = project.get_results(dataset)
       if metric not in rez.columns:
           rez[metric] = np.nan
           warning += f"Metric '{metric}' not present for dataset '{dataset}'\n"
       rez = rez.loc[:,['experiment',metric]]
       rez.columns = ['experiment', dataset]
       if results is None:
           results = rez
       else:
           results = pd.merge(results,rez,on="experiment")
           
    if len(warning) > 0:
       print("*** WARNINGS ***")
       print(warning)

    results = results.round(precision)

    if format == 'markdown':
        print_results_markdown(title, results)
    elif format == 'latex':
        print_results_latex(title, results)
    else:
        print(" ___" + title + "__________________________________[ %s ]___" % metric)
        pd.set_option('expand_frame_repr', False)
        pd.set_option('display.max_columns', 999)
        print(results)




[docs]
def extract_max_tags_lengths(project, asset, tags):
    """
    CLI Internal Function: determine the maximum length of the content
    inside a specific set of tags on an asset in the project.
 
    :param project: The projit project object
    :type project: Projit, required

    :param asset: The asset type
    :type asset: String, required

    :param tags: The tags to search for
    :type tags: list(String), required
 
    :return: List of tag lengths
    :rtype: list(Int)
    """
    if asset in project.tags:
        tagset = project.tags[asset]
        max_tag_lengths = []
        for t in tags:
            temp = []
            for a in tagset:
                if t in tagset[a]:
                    temp.append(len(tagset[a][t]))
                else:
                    temp.append(0)
            max_val = max(temp)
            if max_val<len(t):
               max_val = len(t)
            max_tag_lengths.append(max_val)
        return max_tag_lengths
    else:
        return [0 for x in tags]



################################################################################

[docs]
def task_list(subcmd, project, dataset, format, precision, tags):
    """
    CLI Internal Task Function: List content of a project from the command line
    """
    if len(tags) > 0:
        tags_max_len = max([len(x) for x in tags])
    else:
        tags = []
        tags_max_len = 0

    print()
    if subcmd == "datasets":
        print_header("__Datasets")
        if len(project.datasets.keys()) > 0:
            tag_header = ""
            if len(tags)>0:
               tag_max_lengths = extract_max_tags_lengths(project, "dataset", tags)
               for tag,tag_len in zip(tags,tag_max_lengths):
                   tag_header = tag_header + tag + filler(len(tag), tag_len+3, "_")
                
            long_key = max([len(k) for k in project.datasets.keys()])
            myhead = "__Name" + filler(len("Name"), long_key+3, "_") + tag_header + "Path_________"
            print_header(myhead)
            for ds in project.datasets:
                tag_output = ""
                if len(tags)>0:
                    tag_vals = project.get_tags("dataset", ds, tags)                
                    for tag,tag_len in zip(tag_vals,tag_max_lengths):
                        tag_output = tag_output + tag + filler(len(tag), tag_len+3, " ")
                print("  ", ds, filler(len(ds), long_key+3 ), tag_output, project.datasets[ds], sep="" )
        else:
            print(" NONE")
        print("")
    elif subcmd == "experiments":
        print_header("__Experiments")
        if len(project.experiments) > 0:
            tag_header = ""
            if len(tags)>0:
               tag_max_lengths = extract_max_tags_lengths(project, "experiment", tags)
               for tag,tag_len in zip(tags,tag_max_lengths):
                   tag_header = tag_header + tag + filler(len(tag), tag_len+3, "_")

            long_key = max([len(k[0]) for k in project.experiments])
            myhead = "__Name__" + filler(len("Name__"), long_key+3, "_") + tag_header + "Runs__" + "MeanRunTime___" + "Path______"
            print_header(myhead)
            for exp in project.experiments:
                tag_output = ""
                if len(tags)>0:
                    tag_vals = project.get_tags("experiment", exp[0], tags)
                    for tag,tag_len in zip(tag_vals,tag_max_lengths):
                        tag_output = tag_output + tag + filler(len(tag), tag_len+3, " ")
                execs, mean_time = project.get_experiment_execution_stats(exp[0])
                mins, secs = divmod(mean_time, 60)
                if mins>60:
                    hours, mins = divmod(mins, 60)
                else:
                    hours = 0
                hours = int(hours)
                mins = int(mins)
                secs = int(secs)
                if hours>9:
                    h_str = f"{hours}h"
                elif hours==0:
                    h_str = f"   "
                else:
                    h_str = f" {hours}h"

                if mins>9:
                    m_str = f"{mins}m"
                elif mins==0:
                    m_str = f"   "
                else:
                    m_str = f" {mins}m"

                if secs>9:
                    s_str = f"{secs}s"
                elif mins==0:
                    s_str = f"   "
                else:
                    s_str = f" {secs}s"

                mytime = f" {h_str} {m_str} {s_str}  "

                print("  ", exp[0], filler(len(exp[0]), long_key+3), tag_output, 
                           filler(len(str(execs)), 4), execs, "  ", 
                           mytime, filler(len(str(mytime)), 12), 
                           exp[1], sep=""  
                )
        else:
            print(" NONE")
        print("")
    elif subcmd == "results":
        title = "Results"
        if dataset == "":
            rez = project.get_results()
        else:
            rez = project.get_results(dataset)
            title += " on [%s]"%dataset

        rez = rez.round(precision)
 
        if format == 'markdown':
            print_results_markdown(title, rez)
        elif format == 'latex':
            print_results_latex(title, rez)
        else:
            print_header(f"__Results__[{dataset}]")
            pd.set_option('expand_frame_repr', False)
            pd.set_option('display.max_columns', 999)
            print(rez)
            print()
    else:
        print(" ERROR: List received an unrecognised sub-command: %s" % subcmd)
        exit(1)




###############################################################################

[docs]
def task_render(project, path):
    """
    Generates a pdf and writes it to the provided path

    :param project: The projit project object
    :type project: Projit, required

    :param path: The rendering path
    :type path: String, required
    """
    project.render(path)



###############################################################################


[docs]
def print_results_latex(title, df):
    """
    Latex output - Putting this in a central function in case we change the 
    functionality or format in the future.

    :param title: The table title
    :type title: String, required

    :param df: The dataframe to print out
    :type df: DataFrame, required

    :return: None
    :rtype: None
    """
    #output = df.to_latex()
    #print(output)
    print_latex(df, title)


###############################################################################


[docs]
def print_results_markdown(title, df):
    longest_name = max(df["experiment"].apply(lambda x: len(x)))
    name_spacer = 12
    if(longest_name>10):
        name_spacer = longest_name+2

    col_widths = [name_spacer]
    def colwidth(input):
         wid = len(input)
         if (wid<6):
             return 8
         return wid+2

    other_cols = list(df.columns)
    other_cols.remove("experiment")
    other_col_widths = list(map(colwidth, other_cols))


    def widthGenerator(col_names, col_widths):
        for colname, colwidth in zip(col_names, col_widths):
            longest =  max( df[colname].apply(lambda x: len(str(round(x,2)))))
            if longest > (colwidth-2):
                yield longest+2
            else:
                yield colwidth

    mygen = widthGenerator(other_cols, other_col_widths)
    other_col_widths = list(mygen)
    col_widths.extend(other_col_widths)
    total_widths = sum(col_widths)
    # This title line was an attempt to print it as a merged table cell
    # titleline = "| %s%s %s" % (title, " "*(total_widths-len(title)-2), "|"*(len(col_widths)) )
    titleline = "\n%s\n%s" % (title, "-"*len(title))
    print(titleline)
    header = ""
    for colname, colwidth in zip(list(df.columns), col_widths):
        header += ("| %s%s "% (colname, " "*(colwidth-len(colname)-2) ))
    header += "|"
    under = ""
    for colwith in col_widths:
        under += ("| %s:"% ( "-"*(colwith-2) ))
    under += "|"
    print(header)
    print(under)
    for i in range(len(df)):
        name = df.loc[i,"experiment"]
        rowcontent = "| %s%s "%(name, " "*(name_spacer-len(name)-2) )
        for colname, colwidth in zip(other_cols, other_col_widths):
            content = str(round(df.loc[i,colname],2))
            rowcontent += "| %s%s "%( " "*(colwidth-len(content)-2), content )
        rowcontent += "|"
        print(rowcontent)
    print()


###############################################################################

[docs]
def task_add(project, asset, name, path):
    """
    Add elements to a project from the command line
    """
    if asset == "dataset":
        project.add_dataset(name, path)
    elif asset == "experiment":
        project.add_experiment(name, path)
    else:
        print("ERROR: Request to add unrecognised asset type: %s" % asset)
        exit(1)


################################################################################

[docs]
def task_tag(project, asset, name, values):
    """
    Add tags to an asset in the project from the command line
    """
    vals = values.split(",")
    tags = {}
    for val in vals:
        temp = val.split("=")
        tags[temp[0]] = temp[1]

    if project.validate_asset(asset, name):
        project.add_tags(asset, name, tags)
    else: 
        print(f"ERROR: Invalid request to tag asset {name} of type {asset} - please check available assets")
        exit(1)



###############################################################################

[docs]
def task_rm(project, asset, name):
    """
    Remove elements to a project from the command line
    """
    if asset not in ["dataset","experiment"]:
        print("ERROR: Request to remove unrecognised asset type: %s" % asset)
        exit(1)

    if name == ".":
        print(f"Remove all {asset}s. Please confirm (y/n)")
        response = input(">")
    else: 
        print(f"Remove {asset} named {name}. Please confirm (y/n)")
        response = input(">")

    if response=='y':
        if asset == "dataset":
            project.rm_dataset(name)
        if asset == "experiment":
            project.rm_experiment(name)
    else:
        print(f"** Remove command for {asset} named {name} cancelled ** ")


###############################################################################

[docs]
def task_plot(project, experiment, property, metric):
    if property == "execution":
        print()
        print_header(f"__Experiment_[{experiment}]_execution_time_")
        values = project.get_execution_times(experiment)
        print(ascii_plot(values, xlabel='Iteration', ylabel='Seconds',  width=70, height=12)) 
        print()
    elif property == "hyperparam":
        print()
        print_header(f"__Experiment_[{experiment}]_hyperparameter_[{metric}]_")
        print("  TODO")
        print()
        #print(ascii_plot([50,90,130,70,60,0,80,120,100], xlabel='Iteration', ylabel=metric, width=70, height=12)) 
    elif property == "result":
        print()
        print_header(f"__Experiment_[{experiment}]_result_[{metric}]_")
        print("  TODO")
        print()
        #print(ascii_plot([50,90,130,70,60,0,80,120,100], xlabel='Iteration', ylabel=metric, width=70, height=12)) 
    else:
        print()
        print(f"\nUnrecognized Experiment Property [{property}] -- Valid Options [execution,hyperparam,result]")
        print()



#################################################################################

[docs]
def print_usage(prog):
    """ Command line application usage instrutions. """
    print(" USAGE ")
    print(" ", prog, "[OPTIONS] <COMMAND> [<ASSET>] [<PARAMS>*]")
    print("   <COMMAND>     - CORE TASK TO PERFORM: [init | upate | rm | status | add | list | compare | render]")
    print("   <ASSET>       - (OPTIONAL) Dependant on COMMAND: [dataset | experiment | results]")
    print("   <PARAMS>      - (OPTIONAL) Dependant on COMMAND: Usually names and paths")
    print("   [OPTIONS]")
    print("      -v, --version          - Print version")
    print("      -h, --help             - Get command help")
    print("      -m, --markdown         - Use markdown format when printing results")
    print("      -l, --latex            - Use LaTeX format when printing results")
    print("      -p, --precision <N>    - Set the numerical precision to <N>")
    print("")
    print("   COMMON USAGE PATTERNS")
    print("   ", prog, "init 'Project name'                     # Initialise project")
    print("   ", prog, "status                                  # View project status")
    print("   ", prog, "add dataset train data/train.csv        # Register training data")
    print("   ", prog, "add dataset test data/test.csv          # Register testing data")
    print("   ", prog, "add experiment explore explore.ipynb    # Register an experiment script")
    print("   ", prog, "list datasets                           # List the available datasets")
    print("   ", prog, "list experiments                        # List the registered experiments")
    print("   ", prog, "list results                            # List the registered results ")
    print("   ", prog, "list results test                       # List the registered results on dataset 'test' ")
    print("   ", prog, "plot initial execution                  # Plot the execution times for the experiment named 'initial'")
    print("   ", prog, "plot initial hyperparam alpha           # Plot the change in hyperparam 'alpha' for the experiment named 'initial'")
    print("   ", prog, "plot initial result MSE                 # Plot the change in result 'MSE' for the experiment named 'initial'")
    print("   ", prog, "render path_to_output.pdf               # Render a PDF document summarising the project")
    print("   ", prog, "-m list results test                    # List results on 'test' data in Markdown format")
    print("   ", prog, "rm experiment explore                   # Remove the experiment explore (requires confirmation)")
    print("   ", prog, "rm experiment .                         # Remove all experiments (requires confirmation)")
    print("   ", prog, "-m list results test                    # List results on test data in Markdown format")
    print("   ", prog, "compare dataone,datatwo MAE             # Compare results over datasets using metric MAE")
    print("")



###############################################################################

[docs]
def main():
    try:
        cli_main()
    except Exception as e:
        print("*** Projit CLI Error ***")
        print(e)
    finally:
        if project is not None:
            project.release_lock()



###############################################################################

[docs]
def cli_main():
   parser = argparse.ArgumentParser()
   parser.add_argument('-v', '--version', help='Print Version', action='store_true')
   parser.add_argument('-m', '--markdown', help='Use markdown for output', action='store_true')
   parser.add_argument('-l', '--latex', help='Use LaTeX for output - overrides markdown', action='store_true')
   parser.add_argument('-u', '--usage', help='Print detailed usage instructions with examples', action='store_true')
   parser.add_argument('-p', '--precision', help='Define numerical precision', type=int, default=3)

   subparsers = parser.add_subparsers(dest="cmd") 

   init_parser = subparsers.add_parser('init')
   init_parser.add_argument('name')
   init_parser.add_argument('template', nargs='?', default="")

   up_parser = subparsers.add_parser('update')

   add_parser = subparsers.add_parser('add')
   add_parser.add_argument('asset')
   add_parser.add_argument('name')
   add_parser.add_argument('path')

   add_parser = subparsers.add_parser('tag')
   add_parser.add_argument('asset')
   add_parser.add_argument('name')
   add_parser.add_argument('values')

   list_parser = subparsers.add_parser('list')
   list_parser.add_argument('subcmd')
   list_parser.add_argument('dataset', nargs='?', default="")
   list_parser.add_argument('--tags', nargs='+', default="")

   plot_parser = subparsers.add_parser('plot')
   plot_parser.add_argument('experiment')
   plot_parser.add_argument('property')
   plot_parser.add_argument('metric', nargs='?', default="")

   rm_parser = subparsers.add_parser('rm')
   rm_parser.add_argument('asset')
   rm_parser.add_argument('name')

   comp_parser = subparsers.add_parser('compare')
   comp_parser.add_argument('datasets')
   comp_parser.add_argument('metric')

   ren_parser = subparsers.add_parser('render')
   ren_parser.add_argument('path')

   sta_parser = subparsers.add_parser('status')
 
   args = parser.parse_args() 

   if args.version:
       print(" Version:", __version__)
       exit(1)

   if args.usage:
       print_usage("projit")
       exit(1)

   if args.cmd == None:
       print_usage("projit")
       exit(1)

   if args.cmd == "init":
      task_init(args.name)
      exit(1)

   """
   From this point on all commands required that we are inside a valid projit project
   """
   config_path = locate_projit_config()
   if config_path=="":
       print(" ERROR: This is not a projit project.")
       print("        Please initialise the project first.")
       print(" > projit init <PROJECT NAME>")
       exit(1)

   project = projit_load(config_path)

   format = 'simple'
   if args.markdown:
       format = 'markdown'
   if args.latex:
       format = 'latex'

   if args.cmd == 'list':
      task_list(args.subcmd, project, args.dataset, format, args.precision, args.tags)

   if args.cmd == 'compare':
      datasets = args.datasets.split(",")
      task_compare(project, datasets, args.metric, format, args.precision)

   if args.cmd == 'add':
      task_add(project, args.asset, args.name, args.path)

   if args.cmd == 'tag':
      task_tag(project, args.asset, args.name, args.values)

   if args.cmd == 'rm':
      task_rm(project, args.asset, args.name)

   if args.cmd == 'plot':
      task_plot(project, args.experiment, args.property, args.metric)

   if args.cmd == 'update':
      task_update(project)

   if args.cmd == 'status':
      task_status(project)

   if args.cmd == 'render':
      task_render(project, args.path)



#################################################################################
if __name__ == '__main__':
    main()