#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2023, IBM Corp.
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------
"""
This module contains a class that is the base for all predictive algorithms.
"""
from nzpyida.frame import IdaDataFrame
from nzpyida.base import IdaDataBase
from nzpyida.analytics.utils import map_to_props, materialize_df, make_temp_table_name
from nzpyida.analytics.utils import call_proc_df_in_out, q
from nzpyida.analytics.model_manager import ModelManager
[docs]
class PredictiveModeling:
"""
Generic class for predictive modeling algorithms.
"""
def __init__(self, idadb: IdaDataBase, model_name: str):
"""
Creates the predictive modeling class.
Parameters
----------
idada : IdaDataBase
database connector
model_name : str
model name - if it exists in the database, it will be used, otherwise
it must be trained using fit() function before prediction or scoring is called.
"""
self.idadb = idadb
self.model_name = model_name
self.fit_proc = ''
self.predict_proc = ''
self.score_proc = ''
self.score_inv = False
self.target_column_in_output = None
self.id_column_in_output = None
self.has_print_proc = False
def _fit(self, in_df: IdaDataFrame, params:dict, needs_id=True):
"""
Trains the model.
Parameters
----------
in_df : IdaDataFrame
the input data frame
params : dict
the dictionary of attributes used to build the model
"""
if not isinstance(in_df, IdaDataFrame):
raise TypeError("Argument in_df should be an IdaDataFrame")
if not params.get('id', None) and needs_id:
if in_df.indexer:
params['id'] = q(in_df.indexer)
else:
raise TypeError('Missing id column - either use id_column attribute or set '
'indexer column in the input data frame')
ModelManager(self.idadb).drop_model(self.model_name)
temp_view_name, need_delete = materialize_df(in_df)
params['model'] = self.model_name
params['intable'] = temp_view_name
params_s = map_to_props(params)
try:
self.idadb.ida_query(f'call NZA..{self.fit_proc}(\'{params_s}\')')
finally:
if need_delete:
self.idadb.drop_view(temp_view_name)
def _predict(self, in_df: IdaDataFrame, params:dict, out_table: str=None) -> IdaDataFrame:
"""
Makes predictions based on the model. The model must exist.
Parameters
----------
in_df : IdaDataFrame
the input data frame
params : dict
the dictionary of attributes used for making predictions
out_table : str, optional
the output table where the predictions will be stored
Returns
-------
IdaDataFrame
the data frame containing row identifiers and predicted target values
"""
if not isinstance(in_df, IdaDataFrame):
raise TypeError("Argument in_df should be an IdaDataFrame")
if not ModelManager(self.idadb).model_exists(self.model_name):
raise KeyError("Model name not found in Model Manager, "
"use 'fit' function to train the model first")
params['model'] = self.model_name
return call_proc_df_in_out(proc=self.predict_proc, in_df=in_df, params=params,
out_table=out_table)[0]
def _score(self, in_df: IdaDataFrame, predict_params:dict, target_column: str) -> float:
"""
Scores the model. The model must exist.
Parameters
----------
in_df : IdaDataFrame
the input data frame
predict_params : dict
the dictionary of attributes used for making predictions
target_column : str
the input table column representing the class
Returns
-------
float
the model score
"""
if not isinstance(in_df, IdaDataFrame):
raise TypeError("Argument in_df should be an IdaDataFrame")
if not ModelManager(self.idadb).model_exists(self.model_name):
raise KeyError("Model name not found in Model Manager, "
"use 'fit' function to train the model first")
if not predict_params.get('id', None):
if in_df.indexer:
predict_params['id'] = q(in_df.indexer)
else:
raise TypeError('Missing id column - either use id_column attribute or set '
'indexer column in the input data frame')
out_table = make_temp_table_name()
pred_view_needs_delete, true_view_needs_delete = False, False
try:
pred_df = self._predict(in_df=in_df, params=predict_params, out_table=out_table)
pred_view, pred_view_needs_delete = materialize_df(pred_df)
true_view, true_view_needs_delete = materialize_df(in_df)
id_column = predict_params.get('id')
params = map_to_props({
'pred_table': pred_view,
'true_table': true_view,
'pred_id': q(id_column) if self.id_column_in_output is None
else q(self.id_column_in_output),
'true_id': q(id_column),
'pred_column': q(target_column) if self.target_column_in_output is None
else q(self.target_column_in_output),
'true_column': q(target_column)
})
res = self.idadb.ida_query(f'call NZA..{self.score_proc}(\'{params}\')')
return 1-res[0] if self.score_inv else res[0]
finally:
if self.idadb.exists_table_or_view(out_table):
self.idadb.drop_table(out_table)
if pred_view_needs_delete and self.idadb.exists_table_or_view(pred_view):
self.idadb.drop_view(pred_view)
if true_view_needs_delete and self.idadb.exists_table_or_view(true_view):
self.idadb.drop_view(true_view)
[docs]
def describe(self) -> str:
"""
Returns model description.
Returns
-------
str
model description
"""
if self.has_print_proc:
params = map_to_props({'model': self.model_name})
return self.idadb.ida_query(f'call NZA..PRINT_MODEL(\'{params}\')')[0]
return ''