#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2023, IBM Corp.
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------
"""
IdaDataFrameGroupBy
"""
import pandas as pd
import nzpyida
[docs]
class IdaDataFrameGroupBy(object):
"""
Class representing groupby object, that is object created from IdaDataFrame.
It implements Pandas-like object for grouping.
Examples
---------
>>> ida_iris.groupby("CLASS")
<nzpyida.groupby.IdaDataFrameGroupBy at 0x11c288e10>
>>> df.groupby("CLASS").mean().head()
AVG_SEPAL_LENGTH AVG_SEPAL_WIDTH AVG_PETAL_LENGTH AVG_PETAL_WIDTH CLASS
0 5.006 3.418 1.464 0.244 Iris-setosa
1 5.936 2.770 4.260 1.326 Iris-versicolor
2 6.588 2.974 5.552 2.026 Iris-virginica
>>> mean_sepal_length = ida_iris[["SEPAL_LENGTH", "CLASS"]].groupby("CLASS").mean()
>>> mean_sepal_length.head()
AVG_SEPAL_LENGTH CLASS
0 5.006 Iris-setosa
1 5.936 Iris-versicolor
2 6.588 Iris-virginica
>>> iris_groupby = ida_iris[(ida_iris["SEPAL_LENGTH"]>4) & (ida_iris["SEPAL_WIDTH"]<3)][
>>> ["SEPAL_WIDTH", "SEPAL_LENGTH", "CLASS"]].groupby("CLASS")
>>> iris_groupby
<nzpyida.groupby.IdaDataFrameGroupBy object at 0x119863190>
>>> iris_groupby.count().head()
COUNT_SEPAL_WIDTH COUNT_SEPAL_LENGTH CLASS
0 2 2 Iris-setosa
1 21 21 Iris-virginica
2 34 34 Iris-versicolor
"""
def __init__(self, in_df, columns_to_aggregate, by_column) -> None:
"""
Parameters
-----------
in_df : IdaDataFrame
DataFrame is to be grouped
columns_to_aggregate : List[str]
List of columns that should be run by with aggregate functions
by_column : str
Column to group the in_df by
"""
self.in_df = in_df
self.columns_to_aggregate = columns_to_aggregate
self.by_column = by_column
self.name = in_df.internal_state.get_state()
[docs]
def count(self):
"""
Aggregates all columns with COUNT method.
Counts all non null values in column with respect to grouped classes
Returns
-------
IdaDataFrame
Object with grouped data
"""
self.aggregation_method = "COUNT"
return self._execute_query()
[docs]
def sum(self):
"""
Aggregates all columns with SUM method.
Sums all the values in column with respect to grouped classes
Returns
-------
IdaDataFrame
Object with grouped data
"""
self.aggregation_method = "SUM"
return self._execute_query()
[docs]
def min(self):
"""
Aggregates all columns with MIN method.
Selects the minimum value from column with respect to grouped classes
Returns
-------
IdaDataFrame
Object with grouped data
"""
self.aggregation_method = "MIN"
return self._execute_query()
[docs]
def max(self):
"""
Aggregates all columns with MAX method.
Selects the maximum value from column with respect to grouped classes
Returns
-------
IdaDataFrame
Object with grouped data
"""
self.aggregation_method = "MAX"
return self._execute_query()
[docs]
def mean(self):
"""
Aggregates all columns with AVG method.
Calculates mean of values in column with respect to grouped classes
Returns
-------
IdaDataFrame
Object with grouped data
"""
self.aggregation_method = "AVG"
return self._execute_query()
def _execute_query(self):
select_string = f'{self.aggregation_method}(' + \
f'), {self.aggregation_method}('.join(self.columns_to_aggregate) + \
')'
for col in self.columns_to_aggregate:
select_string = select_string.replace(
f'{self.aggregation_method}({col})',
f'{self.aggregation_method}(\"{col}\") AS \"{self.aggregation_method}_{col}\"')
groupby_string = f'GROUP BY \"{self.by_column}\"'
query = 'SELECT ' + select_string + f', \"{self.by_column}\"' + \
f' FROM ({self.name}) AS TEMP_GB ' + groupby_string
idadf=nzpyida.IdaGeoDataFrame(self.in_df._idadb, self.in_df.tablename)
idadf.internal_state._views.append(query)
return idadf