#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2023, IBM Corp.
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------
"""
This module contains a class that is the base for all regression algorithms.
"""
from typing import Dict
from nzpyida.frame import IdaDataFrame
from nzpyida.base import IdaDataBase
from nzpyida.analytics.utils import map_to_props, materialize_df, make_temp_table_name, q
from nzpyida.analytics.predictive.predictive_modeling import PredictiveModeling
[docs]
class Regression(PredictiveModeling):
"""
Base class for regression algorithms.
"""
def __init__(self, idadb: IdaDataBase, model_name: str):
"""
Creates the regressor class.
Parameters
----------
idada : IdaDataBase
database connector
model_name : str
model name - if it exists in the database, it will be used, otherwise
it must be trained using fit() function before prediction or scoring is called.
"""
super().__init__(idadb, model_name)
self.score_proc = 'MSE'
self.id_column_in_output = idadb.to_def_case('ID')
[docs]
def predict(self, in_df: IdaDataFrame, out_table: str=None,
id_column: str=None) -> IdaDataFrame:
"""
Makes predictions based on this model. The model must exist.
Parameters
----------
in_df : IdaDataFrame
the input data frame to predict
out_table : str, optional
the output table where the predictions will be stored
id_column : str, optional
the input table column identifying a unique instance id
Default: id column used to build the model
Returns
-------
IdaDataFrame
the data frame containing row identifiers and predicted target values
"""
params = {
'id': q(id_column)
}
return self._predict(in_df=in_df, params=params, out_table=out_table)
[docs]
def score(self, in_df: IdaDataFrame, target_column: str,
id_column: str=None) -> float:
"""
Scores the model. The model must exist.
Parameters
----------
in_df : IdaDataFrame
the input data frame for scoring
target_column : str
the input table column representing the class
id_column : str
the input table column identifying a unique instance id - if skipped,
the input data frame indexer must be set and will be used as an instance id
Returns
-------
float
the model score
"""
params = {
'id': q(id_column)
}
return self._score(in_df=in_df, predict_params=params, target_column=target_column)
[docs]
def score_all(self, in_df: IdaDataFrame, target_column: str,
id_column: str=None) -> Dict[str, float]:
"""
Scores the model using MSE, MAE, RSE and RAE. The model must exist.
Parameters
----------
in_df : IdaDataFrame
the input data frame for scoring
target_column : str
the input table column representing the class
id_column : str, optional
the input table column identifying a unique instance id - if skipped,
the input data frame indexer must be set and will be used as an instance id
Returns
-------
dict
the model scores in a dictionary with MSE, MAE, RSE and RAE as keys
"""
if not isinstance(in_df, IdaDataFrame):
raise TypeError("Argument in_df should be an IdaDataFrame")
if not id_column:
if in_df.indexer:
id_column = q(in_df.indexer)
else:
raise TypeError('Missing id column - either use id_column attribute or set '
'indexer column in the input data frame')
out_table = make_temp_table_name()
pred_view_needs_delete, true_view_needs_delete = False, False
try:
pred_df = self.predict(in_df=in_df, out_table=out_table, id_column=id_column)
pred_view, pred_view_needs_delete = materialize_df(pred_df)
true_view, true_view_needs_delete = materialize_df(in_df)
params = map_to_props({
'pred_table': pred_view,
'true_table': true_view,
'pred_id': q(id_column) if self.id_column_in_output is None
else q(self.id_column_in_output),
'true_id': q(id_column),
'pred_column': target_column if self.target_column_in_output is None
else q(self.target_column_in_output),
'true_column': q(target_column)
})
res1 = pred_df.ida_query(f'call NZA..MSE(\'{params}\')')
res2 = pred_df.ida_query(f'call NZA..MAE(\'{params}\')')
res3 = pred_df.ida_query(f'call NZA..RSE(\'{params}\')')
res4 = pred_df.ida_query(f'call NZA..RAE(\'{params}\')')
res_dict = {
"MSE": res1[0],
"MAE": res2[0],
"RSE": res3[0],
"RAE": res4[0]
}
return res_dict
finally:
self.idadb.drop_table(out_table)
if pred_view_needs_delete:
self.idadb.drop_view(pred_view)
if true_view_needs_delete:
self.idadb.drop_view(true_view)