Quickstart

Training a model

Simply use an estimator by initialising, fitting and predicting:

from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from hbbrain.numerical_data.incremental_learner.onln_gfmm import OnlineGFMM
# Load dataset
X, y = load_iris(return_X_y=True)
# Normalise features into the range of [0, 1] because hyperbox-based models only work in a unit range
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Training a model
clf = OnlineGFMM(theta=0.1).fit(X_train, y_train)
# Make prediction
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f'Accuracy = {acc * 100: .2f}%')

In an sklearn Pipeline

Using hyperbox-based estimators in a sklearn Pipeline:

from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from hbbrain.numerical_data.incremental_learner.onln_gfmm import OnlineGFMM

# Load dataset
X, y = load_iris(return_X_y=True)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Create a GFMM model
onln_gfmm_clf = OnlineGFMM(theta=0.1)
# Create a pipeline
pipe = Pipeline([
   ('scaler', MinMaxScaler()),
   ('onln_gfmm', onln_gfmm_clf)
])
# Training
pipe.fit(X_train, y_train)
# Make prediction
acc = pipe.score(X_test, y_test)
print(f'Testing accuracy = {acc * 100: .2f}%')