Source code for rom.generators.linear_model

# -*- coding: utf-8 -*-
"""
.. moduleauthor:: Nicholas Long (nicholas.l.long@colorado.edu, nicholas.lee.long@gmail.com)
"""
import time
import zipfile

from ..shared import pickle_file, save_dict_to_csv, zipdir
from sklearn.linear_model import LinearRegression

from .model_generator_base import ModelGeneratorBase


[docs]class LinearModel(ModelGeneratorBase): def __init__(self, analysis_id, random_seed=None, **kwargs): super().__init__(analysis_id, random_seed, **kwargs)
[docs] def evaluate(self, model, model_name, model_type, x_data, y_data, downsample, build_time, cv_time, covariates=None, scaler=None): """ Evaluate the performance of the forest based on known x_data and y_data. If the model was scaled, then the test data will already be scaled. """ yhat, performance = super().evaluate( model, model_name, model_type, x_data, y_data, downsample, build_time, cv_time, covariates, scaler ) self.anova_plots(y_data, yhat, model_name) return performance
[docs] def build(self, metamodel, **kwargs): super().build(metamodel, **kwargs) # analysis_options = kwargs.get('algorithm_options', {}) train_x, test_x, train_y, test_y, validate_xy, _scaler = self.train_test_validate_split( self.dataset, metamodel, downsample=self.downsample ) # save the validate dataframe to be used later to validate the accuracy of the models self.save_dataframe(validate_xy, "%s/lm_validation" % self.validation_dir) for response in metamodel.available_response_names(self.model_type): print("Fitting Linear Model for %s" % response) trained_model = LinearRegression() start = time.time() trained_model.fit(train_x, train_y[response]) build_time = time.time() - start pickle_file(trained_model, '%s/%s' % (self.models_dir, response)) self.model_results.append( self.evaluate( trained_model, response, 'best', test_x, test_y[response], self.downsample, build_time, 0 ) ) if self.model_results: save_dict_to_csv(self.model_results, '%s/model_results.csv' % self.base_dir) # zip up the models zipf = zipfile.ZipFile( '%s/models.zip' % self.models_dir, 'w', zipfile.ZIP_DEFLATED, allowZip64=True ) zipdir(self.models_dir, zipf, '.pkl') zipf.close() # save the data that was used in the models for future processing and analysis self.dataset.to_csv('%s/data.csv' % self.data_dir)