Source code for rom.evaluate_helpers

# -*- coding: utf-8 -*-
"""
.. moduleauthor:: Nicholas Long (nicholas.l.long@colorado.edu, nicholas.lee.long@gmail.com)
"""
import os

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set(style="ticks", color_codes=True)


[docs]def evaluate_process_cv_results(cv_result_file, response, output_dir): if os.path.exists(cv_result_file): # Load the cv results print("Reading CV results file: %s" % cv_result_file) df = pd.read_csv(cv_result_file) df = df.drop('response', 1) # Fill in the max_depth that has NA when it was set to auto df = df.fillna(0) df = df.drop('downsample', 1) df = df.drop('mean_score_time', 1) df = df.drop('rank_test_score', 1) df = df.drop('mean_train_score', 1) # df = df.drop('max_depth', 1) df = df.drop(df.columns[[0]], axis=1) newplt = sns.pairplot(df) newplt.savefig('%s/fig_cv_%s_pairplot.png' % (output_dir, response)) plt.clf() # Plot specific xy plots f, ax = plt.subplots(figsize=(6.5, 6.5)) sns.despine(f, left=True, bottom=True) newplt = sns.jointplot( df['mean_fit_time'], df['mean_test_score'], kind="hex" ).set_axis_labels('Mean Fit Time (seconds)', 'Mean Test Score (fraction)') newplt.savefig('%s/fig_cv_%s_time_v_score_hex.png' % (output_dir, response)) plt.clf() # Plot specific xy plots -- darkgrid background with plt.rc_context(dict(sns.axes_style("whitegrid"))): f, ax = plt.subplots(figsize=(6.5, 6.5)) newplt = sns.scatterplot(x=df['mean_fit_time'], y=df['mean_test_score'], ax=ax).get_figure() ax.set_xlabel('Mean Fit Time (seconds)') ax.set_ylabel('Mean Test Score (fraction)') newplt.savefig('%s/fig_cv_%s_time_v_score.png' % (output_dir, response)) plt.clf()
[docs]def evaluate_process_model_results(model_results_file, output_dir): if os.path.exists(model_results_file): # Process the model results df = pd.read_csv(model_results_file) # If best exists, then use that, otherwise, just use what is in the column if 'best' in df.model_type.unique(): df = df[df.model_type == 'best'] # If there are two similar columns then remove one of them and update the name of the remaining item if all(x in ['ETSHeatingOutletTemperature', 'ETSCoolingOutletTemperature'] for x in df.name.unique()): df = df[df.name != 'ETSCoolingOutletTemperature'] df.loc[df.name == 'ETSHeatingOutletTemperature', 'name'] = 'ETSOutletTemperature' # Melt the data for plot purposes melted_df = pd.melt( df[['name', 'time_to_build', 'time_to_cv']], id_vars='name', var_name='model', value_name='time' ) # Plot the data fig = plt.figure(figsize=(8, 3), dpi=100) # Defaults to the ax in the figure ax = sns.barplot(x='time', y='name', hue='model', data=melted_df, ci=None) ax.set_xlabel('Time (seconds)') ax.set_ylabel('') plt.tight_layout() fig.savefig('%s/fig_time_to_build.png' % output_dir) fig.clf() plt.clf()
[docs]def evaluate_process_all_model_results(data, validation_dir): # For unique_value in data['name'].unique(): sub_df = data[data['model_type'] == 'best'].sort_values(by=['name', 'model_method']) data.to_csv('%s/all_model_results.csv' % validation_dir, index=False) keep_cols = ['name', 'model_method', 'pearson'] sub_df[keep_cols].to_csv('%s/pcc_model_results.csv' % validation_dir, index=False)