Source code for nzpyida.analytics.predictive.predictive_modeling

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2023, IBM Corp.
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------
"""
This module contains a class that is the base for all predictive algorithms.
"""
from nzpyida.frame import IdaDataFrame
from nzpyida.base import IdaDataBase
from nzpyida.analytics.utils import map_to_props, materialize_df, make_temp_table_name
from nzpyida.analytics.utils import call_proc_df_in_out, q
from nzpyida.analytics.model_manager import ModelManager


[docs] class PredictiveModeling: """ Generic class for predictive modeling algorithms. """ def __init__(self, idadb: IdaDataBase, model_name: str): """ Creates the predictive modeling class. Parameters ---------- idada : IdaDataBase database connector model_name : str model name - if it exists in the database, it will be used, otherwise it must be trained using fit() function before prediction or scoring is called. """ self.idadb = idadb self.model_name = model_name self.fit_proc = '' self.predict_proc = '' self.score_proc = '' self.score_inv = False self.target_column_in_output = None self.id_column_in_output = None self.has_print_proc = False def _fit(self, in_df: IdaDataFrame, params:dict, needs_id=True): """ Trains the model. Parameters ---------- in_df : IdaDataFrame the input data frame params : dict the dictionary of attributes used to build the model """ if not isinstance(in_df, IdaDataFrame): raise TypeError("Argument in_df should be an IdaDataFrame") if not params.get('id', None) and needs_id: if in_df.indexer: params['id'] = q(in_df.indexer) else: raise TypeError('Missing id column - either use id_column attribute or set ' 'indexer column in the input data frame') ModelManager(self.idadb).drop_model(self.model_name) temp_view_name, need_delete = materialize_df(in_df) params['model'] = self.model_name params['intable'] = temp_view_name params_s = map_to_props(params) try: self.idadb.ida_query(f'call NZA..{self.fit_proc}(\'{params_s}\')') finally: if need_delete: self.idadb.drop_view(temp_view_name) def _predict(self, in_df: IdaDataFrame, params:dict, out_table: str=None) -> IdaDataFrame: """ Makes predictions based on the model. The model must exist. Parameters ---------- in_df : IdaDataFrame the input data frame params : dict the dictionary of attributes used for making predictions out_table : str, optional the output table where the predictions will be stored Returns ------- IdaDataFrame the data frame containing row identifiers and predicted target values """ if not isinstance(in_df, IdaDataFrame): raise TypeError("Argument in_df should be an IdaDataFrame") if not ModelManager(self.idadb).model_exists(self.model_name): raise KeyError("Model name not found in Model Manager, " "use 'fit' function to train the model first") params['model'] = self.model_name return call_proc_df_in_out(proc=self.predict_proc, in_df=in_df, params=params, out_table=out_table)[0] def _score(self, in_df: IdaDataFrame, predict_params:dict, target_column: str) -> float: """ Scores the model. The model must exist. Parameters ---------- in_df : IdaDataFrame the input data frame predict_params : dict the dictionary of attributes used for making predictions target_column : str the input table column representing the class Returns ------- float the model score """ if not isinstance(in_df, IdaDataFrame): raise TypeError("Argument in_df should be an IdaDataFrame") if not ModelManager(self.idadb).model_exists(self.model_name): raise KeyError("Model name not found in Model Manager, " "use 'fit' function to train the model first") if not predict_params.get('id', None): if in_df.indexer: predict_params['id'] = q(in_df.indexer) else: raise TypeError('Missing id column - either use id_column attribute or set ' 'indexer column in the input data frame') out_table = make_temp_table_name() pred_view_needs_delete, true_view_needs_delete = False, False try: pred_df = self._predict(in_df=in_df, params=predict_params, out_table=out_table) pred_view, pred_view_needs_delete = materialize_df(pred_df) true_view, true_view_needs_delete = materialize_df(in_df) id_column = predict_params.get('id') params = map_to_props({ 'pred_table': pred_view, 'true_table': true_view, 'pred_id': q(id_column) if self.id_column_in_output is None else q(self.id_column_in_output), 'true_id': q(id_column), 'pred_column': q(target_column) if self.target_column_in_output is None else q(self.target_column_in_output), 'true_column': q(target_column) }) res = self.idadb.ida_query(f'call NZA..{self.score_proc}(\'{params}\')') return 1-res[0] if self.score_inv else res[0] finally: if self.idadb.exists_table_or_view(out_table): self.idadb.drop_table(out_table) if pred_view_needs_delete and self.idadb.exists_table_or_view(pred_view): self.idadb.drop_view(pred_view) if true_view_needs_delete and self.idadb.exists_table_or_view(true_view): self.idadb.drop_view(true_view)
[docs] def describe(self) -> str: """ Returns model description. Returns ------- str model description """ if self.has_print_proc: params = map_to_props({'model': self.model_name}) return self.idadb.ida_query(f'call NZA..PRINT_MODEL(\'{params}\')')[0] return ''