Continuous learning from a trained hyperbox-based model

This example shows how to perform a continuous learning ability for a trained hyperbox-based model.

[1]:

%matplotlib notebook

[2]:

import os
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np

Load training and testing datasets

In this example, we create two training datasets. One data set is used to trained a model and store it to the data storage. The other data set is used to continously train the stored model after reloading that trained model.

[3]:

from sklearn.model_selection import StratifiedKFold

[4]:

# Get the path to the this jupyter notebook file
this_notebook_dir = os.path.dirname(os.path.abspath("__file__"))
this_notebook_dir

[4]:

'C:\\hyperbox-brain\\examples\\other_learning_ability_gfmm'

[5]:

# Get the home folder of the hyperbox-brain toolbox
from pathlib import Path
project_dir = Path(this_notebook_dir).parent.parent
project_dir

[5]:

WindowsPath('C:/hyperbox-brain')

[6]:

# Create the path to the training and testing files
training_file = os.path.join(project_dir, Path("dataset/syn_num_train.csv"))
testing_file = os.path.join(project_dir, Path("dataset/syn_num_test.csv"))

[7]:

# Create training and testing data sets
df_train = pd.read_csv(training_file, header=None)
df_test = pd.read_csv(testing_file, header=None)

Xy_train = df_train.to_numpy()
Xy_test = df_test.to_numpy()

Xtr = Xy_train[:, :-1]
ytr = Xy_train[:, -1]

Xtest = Xy_test[:, :-1]
ytest = Xy_test[:, -1]

[8]:

# Split the existing training set into two training sets
skf = StratifiedKFold(n_splits=2)
for train1_index, train2_index in skf.split(Xtr, ytr):
    X_tr1, X_tr2 = Xtr[train1_index], Xtr[train2_index]
    y_tr1, y_tr2 = ytr[train1_index], ytr[train2_index]

This example shows how to continue to train a deployed model. This example will use an incremental learning algorithm and an agglomerative learning algorithm without retraining from scratch for demonstration.

1. Training a General fuzzy min-max neural network model using an incremental learning algorithm

[9]:

from hbbrain.numerical_data.incremental_learner.onln_gfmm import OnlineGFMM

[10]:

# Initializing parameters
theta = 0.1
theta_min = 0.1
gamma = 1
is_draw = False

[11]:

onln_gfmm_clf = OnlineGFMM(theta=theta, theta_min=theta_min, gamma=gamma, is_draw=is_draw)
onln_gfmm_clf.fit(X_tr1, y_tr1)

[11]:

OnlineGFMM(C=array([1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1,
       1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]),
           V=array([[0.42413   , 0.53516   ],
       [0.70577   , 0.39146   ],
       [0.82785   , 0.78025   ],
       [0.6416    , 0.4824875 ],
       [0.48794   , 0.672     ],
       [0.26651   , 0.18424   ],
       [0.32527   , 0.60393   ],
       [0.19944   , 0.03      ],
       [0.29343   , 0.28975   ],
       [0.63683   , 0.6936    ],
       [0.32782   , 0.55997   ],
       [0.03      , 0.47757   ],
       [0.54181   , 0.43986   ],
       [0.73368   , 0.2170...
       [0.49255   , 0.43164   ],
       [0.81877   , 0.51294   ],
       [0.4713    , 0.77996   ],
       [0.25281   , 0.41059   ],
       [0.25858   , 0.30637   ],
       [0.3378    , 0.451095  ],
       [0.68354   , 0.41653   ],
       [0.34544   , 0.85954   ],
       [0.59061   , 0.62471   ],
       [0.68628   , 0.65662   ],
       [0.14324   , 0.47785   ],
       [0.41517   , 0.51424   ],
       [0.83355   , 0.5761    ],
       [0.15331   , 0.13567   ],
       [0.25929   , 0.81558   ],
       [0.815     , 0.35526   ],
       [0.67625   , 0.80457   ],
       [0.37033   , 0.26124   ],
       [0.67914   , 0.48785   ]]),
           theta=0.1, theta_min=0.1)

Display decision boundaries among classes if input data are 2-dimensional

[12]:

onln_gfmm_clf.draw_hyperbox_and_boundary("The trained GFMM classifier and its decision boundaries")

[13]:

print("Number of hyperboxes = ", onln_gfmm_clf.get_n_hyperboxes())

Number of hyperboxes =  39

Make prediction

[14]:

from sklearn.metrics import accuracy_score

[15]:

y_pred = onln_gfmm_clf.predict(Xtest)
acc = accuracy_score(ytest, y_pred)
print(f'Accuracy = {acc * 100: .2f}%')

Accuracy =  86.40%

Store the trained model

[16]:

from hbbrain.utils.model_storage import store_model

[17]:

store_model(onln_gfmm_clf, "store_cont_model.dummy")

Reload the trained model and make prediction

[18]:

from hbbrain.utils.model_storage import load_model

[19]:

trained_onln_gfmm_clf = load_model("store_cont_model.dummy")

[20]:

y_pred_trained_model = trained_onln_gfmm_clf.predict(Xtest)
acc = accuracy_score(ytest, y_pred_trained_model)
print(f'Accuracy = {acc * 100: .2f}%')

Accuracy =  86.40%

Continue to train the deployed model using another training set

[21]:

trained_onln_gfmm_clf.fit(X_tr2, y_tr2)

[21]:

OnlineGFMM(C=array([1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1,
       1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1,
       2, 2, 1, 2, 2, 2, 2, 2, 1]),
           V=array([[0.42413   , 0.53516   ],
       [0.70577   , 0.397105  ],
       [0.82785   , 0.78025   ],
       [0.66038   , 0.51128   ],
       [0.48794   , 0.672     ],
       [0.26651   , 0.18424   ],
       [0.32289   , 0.60194   ],
       [0.19944   , 0.03      ],
       [0.29343   , 0.28975   ],
       [0.63683   , 0.6936    ],
       [0.32906   , 0.55512   ],
       [0.03      , 0.47757   ],
       [0.54...
       [0.25929   , 0.81558   ],
       [0.815     , 0.397095  ],
       [0.67906   , 0.83605   ],
       [0.37033   , 0.26124   ],
       [0.66037   , 0.57837   ],
       [0.52197   , 0.91371   ],
       [0.52621   , 0.66846   ],
       [0.80583   , 0.43242   ],
       [0.79935   , 0.7757    ],
       [0.35813   , 0.58772   ],
       [0.79516   , 0.32629   ],
       [0.70743   , 0.50325   ],
       [0.36057   , 0.71561   ],
       [0.72496   , 0.38674   ],
       [0.28822   , 0.62174   ],
       [0.14737   , 0.28498   ],
       [0.56487   , 0.17003   ],
       [0.68469   , 0.2221    ],
       [0.55763   , 0.43813   ]]),
           theta=0.1, theta_min=0.1)

Display decision boundaries among classes if input data are 2-dimensional

[22]:

trained_onln_gfmm_clf.draw_hyperbox_and_boundary("The trained GFMM classifier and its decision boundaries")

[23]:

print("Number of hyperboxes = ", trained_onln_gfmm_clf.get_n_hyperboxes())

Number of hyperboxes =  53

Make prediction

[24]:

y_pred = trained_onln_gfmm_clf.predict(Xtest)
acc = accuracy_score(ytest, y_pred)
print(f'Accuracy = {acc * 100: .2f}%')

Accuracy =  84.50%

2. Training a General fuzzy min-max neural network model using an agglomerative learning algorithm

[26]:

from hbbrain.numerical_data.batch_learner.agglo_gfmm import AgglomerativeLearningGFMM

[27]:

# Initialise parameters
theta = 0.2
gamma = 1
min_simil = 0
simil_measure = 'long'
is_draw = False

[28]:

agglo_sm_clf = AgglomerativeLearningGFMM(theta=theta, gamma=gamma, min_simil=min_simil, simil_measure=simil_measure, is_draw=is_draw)
agglo_sm_clf.fit(X_tr1, y_tr1)

[28]:

AgglomerativeLearningGFMM(min_simil=0, simil_measure='long', theta=0.2)

Display decision boundaries among classes if input data are 2-dimensional

[29]:

agglo_sm_clf.draw_hyperbox_and_boundary("The trained GFMM classifier and its decision boundaries")

[30]:

print("Number of hyperboxes = ", agglo_sm_clf.get_n_hyperboxes())

Number of hyperboxes =  30

Make prediction

[31]:

y_pred_agglosm = agglo_sm_clf.predict(Xtest)
acc = accuracy_score(ytest, y_pred_agglosm)
print(f'Accuracy = {acc * 100: .2f}%')

Accuracy =  84.90%

Store the trained model

[32]:

store_model(agglo_sm_clf, "store_agglosm_model.dummy")

Reload the trained model and make prediction

[33]:

trained_agglo_sm_clf = load_model("store_agglosm_model.dummy")

[34]:

y_pred_trained_agglosm_model = trained_agglo_sm_clf.predict(Xtest)
acc = accuracy_score(ytest, y_pred_trained_agglosm_model)
print(f'Accuracy = {acc * 100: .2f}%')

Accuracy =  84.90%

Continue to train the deployed model using another training set

[35]:

trained_agglo_sm_clf.fit(X_tr2, y_tr2)

[35]:

AgglomerativeLearningGFMM(min_simil=0, simil_measure='long', theta=0.2)

Display decision boundaries among classes if input data are 2-dimensional

[36]:

trained_agglo_sm_clf.draw_hyperbox_and_boundary("The trained GFMM classifier and its decision boundaries")

[37]:

print("Number of hyperboxes = ", trained_agglo_sm_clf.get_n_hyperboxes())

Number of hyperboxes =  51

Make prediction

[38]:

y_pred_agglosm = trained_agglo_sm_clf.predict(Xtest)
acc = accuracy_score(ytest, y_pred_agglosm)
print(f'Accuracy = {acc * 100: .2f}%')

Accuracy =  87.90%

Read the Docs v: latest

Versions: latest; stable

Downloads: pdf; html

On Read the Docs: Project Home; Builds