Ensemble(Voting and Bagging)

2022-07-18 6 분 소요

[Notice] [ML_10]

Ensemble(Voting and Bagging)

A machine learning ensemble is a method of finding the optimal answer using multiple machine learning models.

Learning data using multiple models and averaging the prediction results of all models

Types of ensemble techniques

Voting: draw results through voting
Bagging: Deriving results by creating duplicate samples
Boosting: weighting while compensating for previous errors
Stacking: The meta model is predicted once again through the predicted results based on multiple models.

References

import pandas as pd
import numpy as np
from IPython.display import Image

np.set_printoptions(suppress=True)

from sklearn.datasets import load_boston

data = load_boston()

df = pd.DataFrame(data['data'], columns = data['feature_names'])

df['MEDV'] = data['target']

df.head()

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	MEDV
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33	36.2

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df.drop('MEDV', 1), df['MEDV'])

x_train.shape, x_test.shape

((379, 13), (127, 13))

x_train.head()

	CRIM	ZN	INDUS	CHAS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT
479	14.33370	0.0	18.10	0.0	0.614	6.229	88.0	1.9512	24.0	666.0	20.2	383.32	13.11
370	6.53876	0.0	18.10	1.0	0.631	7.016	97.5	1.2024	24.0	666.0	20.2	392.05	2.96
43	0.15936	0.0	6.91	0.0	0.448	6.211	6.5	5.7209	3.0	233.0	17.9	394.46	7.44
173	0.09178	0.0	4.05	0.0	0.510	6.416	84.1	2.6463	5.0	296.0	16.6	395.50	9.04
197	0.04666	80.0	1.52	0.0	0.404	7.107	36.6	7.3090	2.0	329.0	12.6	354.31	8.61

y_train.head()

479    21.4
370    50.0
43     24.7
173    23.6
197    30.3
Name: MEDV, dtype: float64

from sklearn.metrics import mean_absolute_error, mean_squared_error

import matplotlib.pyplot as plt
import seaborn as sns

my_predictions = {}

colors = ['r', 'c', 'm', 'y', 'k', 'khaki', 'teal', 'orchid', 'sandybrown',
          'greenyellow', 'dodgerblue', 'deepskyblue', 'rosybrown', 'firebrick',
          'deeppink', 'crimson', 'salmon', 'darkred', 'olivedrab', 'olive', 
          'forestgreen', 'royalblue', 'indigo', 'navy', 'mediumpurple', 'chocolate',
          'gold', 'darkorange', 'seagreen', 'turquoise', 'steelblue', 'slategray', 
          'peru', 'midnightblue', 'slateblue', 'dimgray', 'cadetblue', 'tomato'
         ]

def plot_predictions(name_, pred, actual):
    df = pd.DataFrame({'prediction': pred, 'actual': y_test})
    df = df.sort_values(by='actual').reset_index(drop=True)

    plt.figure(figsize=(12, 9))
    plt.scatter(df.index, df['prediction'], marker='x', color='r')
    plt.scatter(df.index, df['actual'], alpha=0.7, marker='o', color='black')
    plt.title(name_, fontsize=15)
    plt.legend(['prediction', 'actual'], fontsize=12)
    plt.show()

def mse_eval(name_, pred, actual):
    global predictions
    global colors

    plot_predictions(name_, pred, actual)

    mse = mean_squared_error(pred, actual)
    my_predictions[name_] = mse

    y_value = sorted(my_predictions.items(), key=lambda x: x[1], reverse=True)
    
    df = pd.DataFrame(y_value, columns=['model', 'mse'])
    print(df)
    min_ = df['mse'].min() - 10
    max_ = df['mse'].max() + 10
    
    length = len(df)
    
    plt.figure(figsize=(10, length))
    ax = plt.subplot()
    ax.set_yticks(np.arange(len(df)))
    ax.set_yticklabels(df['model'], fontsize=15)
    bars = ax.barh(np.arange(len(df)), df['mse'])
    
    for i, v in enumerate(df['mse']):
        idx = np.random.choice(len(colors))
        bars[i].set_color(colors[idx])
        ax.text(v + 2, i, str(round(v, 3)), color='k', fontsize=15, fontweight='bold')
        
    plt.title('MSE Error', fontsize=18)
    plt.xlim(min_, max_)
    
    plt.show()

def remove_model(name_):
    global my_predictions
    try:
        del my_predictions[name_]
    except KeyError:
        return False
    return True

def plot_coef(columns, coef):
    coef_df = pd.DataFrame(list(zip(columns, coef)))
    coef_df.columns=['feature', 'coef']
    coef_df = coef_df.sort_values('coef', ascending=False).reset_index(drop=True)
    
    fig, ax = plt.subplots(figsize=(9, 7))
    ax.barh(np.arange(len(coef_df)), coef_df['coef'])
    idx = np.arange(len(coef_df))
    ax.set_yticks(idx)
    ax.set_yticklabels(coef_df['feature'])
    fig.tight_layout()
    plt.show()

single regression prediction model

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures

linear_reg = LinearRegression(n_jobs = -1)
linear_reg.fit(x_train, y_train)
pred = linear_reg.predict(x_test)
mse_eval('LinearRegression', pred, y_test)

              model        mse
0  LinearRegression  22.641613

ridge = Ridge(alpha = 1)
ridge.fit(x_train, y_train)
pred = ridge.predict(x_test)
mse_eval('Ridge(alpha=1)', pred, y_test)

              model        mse
0  LinearRegression  22.641613
1    Ridge(alpha=1)  21.869433

lasso = Lasso(alpha = 0.01)
lasso.fit(x_train, y_train)
pred = lasso.predict(x_test)
mse_eval('Lasso(alpha=0.01)', pred, y_test)

               model        mse
0   LinearRegression  22.641613
1  Lasso(alpha=0.01)  22.279112
2     Ridge(alpha=1)  21.869433

elasticnet = ElasticNet(alpha = 0.5, l1_ratio = 0.8)
elasticnet.fit(x_train, y_train)
pred = elasticnet.predict(x_test)
mse_eval('ElasticNet(l1_ratio=0.8)', pred, y_test)    

                      model        mse
0  ElasticNet(l1_ratio=0.8)  23.074116
1          LinearRegression  22.641613
2         Lasso(alpha=0.01)  22.279112
3            Ridge(alpha=1)  21.869433

elasticnet_pipeline = make_pipeline(
    StandardScaler(),
    ElasticNet(alpha = 0.1, l1_ratio = 0.2)
)
elasticnet_pred = elasticnet_pipeline.fit(x_train, y_train).predict(x_test)
mse_eval('Standard ElasticNet', elasticnet_pred, y_test)

                      model        mse
0  ElasticNet(l1_ratio=0.8)  23.074116
1          LinearRegression  22.641613
2         Lasso(alpha=0.01)  22.279112
3       Standard ElasticNet  22.058335
4            Ridge(alpha=1)  21.869433

poly_pipeline = make_pipeline(
    PolynomialFeatures(degree = 2, include_bias = False),
    StandardScaler(),
    ElasticNet(alpha = 0.1, l1_ratio = 0.2)
)
poly_pred = poly_pipeline.fit(x_train, y_train).predict(x_test)
mse_eval('Poly ElasticNet', poly_pred, y_test)

C:\Users\boyka\anaconda3\lib\site-packages\sklearn\linear_model\_coordinate_descent.py:530: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 70.54427247266358, tolerance: 3.188612944591029
  model = cd_fast.enet_coordinate_descent(

                      model        mse
0  ElasticNet(l1_ratio=0.8)  23.074116
1          LinearRegression  22.641613
2         Lasso(alpha=0.01)  22.279112
3       Standard ElasticNet  22.058335
4            Ridge(alpha=1)  21.869433
5           Poly ElasticNet  17.086267

Ensemble

scikit-learn document

Voting - Regression

Voting literally means method of deciding through voting Voting is similar to bagging in that it is a voting method, but there are major differences as follows

Voting uses a combination of different algorithm models
Bagging uses different sample combinations within the same algorithm

from sklearn.ensemble import VotingRegressor, VotingClassifier

You must define the model in the form of Tuple.

single_models = [
    ('linear_reg', linear_reg),
    ('ridge', ridge),
    ('lasso', lasso),
    ('elasticnet_pipeline', elasticnet_pipeline),
    ('poly_pipeline', poly_pipeline)
]

voting_regressor = VotingRegressor(single_models, n_jobs=-1)

voting_regressor.fit(x_train, y_train)

VotingRegressor(estimators=[('linear_reg', LinearRegression(n_jobs=-1)),
                            ('ridge', Ridge(alpha=1)),
                            ('lasso', Lasso(alpha=0.01)),
                            ('elasticnet_pipeline',
                             Pipeline(steps=[('standardscaler',
                                              StandardScaler()),
                                             ('elasticnet',
                                              ElasticNet(alpha=0.1,
                                                         l1_ratio=0.2))])),
                            ('poly_pipeline',
                             Pipeline(steps=[('polynomialfeatures',
                                              PolynomialFeatures(include_bias=False)),
                                             ('standardscaler',
                                              StandardScaler()),
                                             ('elasticnet',
                                              ElasticNet(alpha=0.1,
                                                         l1_ratio=0.2))]))],
                n_jobs=-1)

voting_pred = voting_regressor.predict(x_test)

mse_eval('Voting Ensemble', voting_pred, y_test)

                      model        mse
0  ElasticNet(l1_ratio=0.8)  23.074116
1          LinearRegression  22.641613
2         Lasso(alpha=0.01)  22.279112
3       Standard ElasticNet  22.058335
4            Ridge(alpha=1)  21.869433
5           Voting Ensemble  20.521588
6           Poly ElasticNet  17.086267

Voting - Classification

Referece

When building a classifier model, the Voting Ensemble has one important parameter

voting = {‘hard’, ‘soft’}

When set to hard

Let’s take binary classification as an example of classifying class as 0, 1 and predicting.

In the hard voting method, multiple classes for the result value are borrowed.

Taking classification as an example, assuming that the predicted values for classification were 1, 0, 0, 1, 1, 1 received 3 votes and 0 received 2 votes, so in the Hard Voting method, 1 is predicted as the final value.

soft

The soft vote method calculates the average value of each probability and then determines the value with the highest probability.

For example, if the probability of class 0 was (0.4, 0.9, 0.9, 0.4, 0.4) and the probability of class 1 was (0.6, 0.1, 0.1, 0.6, 0.6),

The final probability of class 0 is (0.4+0.9+0.9+0.4+0.4) / 5 = 0.44,
The final probability of class 1 is (0.6+0.1+0.1+0.6+0.6) / 5 = 0.4

Because the result is different from the previous Hard Vote result, the final result will be selected.

from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier

models = [
    ('Logi', LogisticRegression()),
    ('Ridge', RidgeClassifier())
]

vc = VotingClassifier(models, voting='hard')

Bagging

Bagging is short for Bootstrap Aggregating.

Bootstrap = Sample + Aggregating = Aggregating

Bootstrap splits multiple datasets by sampling and allowing overlapping

If the configuration of the data set is [1, 2, 3, 4, 5],

group 1 = [1, 2, 3]
group 2 = [1, 3, 4]
group 3 = [2, 3, 5]

Image('https://teddylee777.github.io/images/2019-12-17/image-20191217015537872.png')

Voting VS Bagging

Voting is an ensemble for a combination of multiple algorithms
Bagging ensembles multiple sample combinations for one single algorithm.

Representative Bagging Ensemble

RandomForest
Bagging

RandomForest

DecisionTree (tree)-based Bagging Ensemble
Very popular ensemble model
Easy to use and excellent in performance

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

rfr = RandomForestRegressor()
rfr.fit(x_train, y_train)

RandomForestRegressor()

rfr_pred = rfr.predict(x_test)
mse_eval('RandomForest Ensemble', rfr_pred, y_test)

                      model        mse
0  ElasticNet(l1_ratio=0.8)  23.074116
1          LinearRegression  22.641613
2         Lasso(alpha=0.01)  22.279112
3       Standard ElasticNet  22.058335
4            Ridge(alpha=1)  21.869433
5           Voting Ensemble  20.521588
6           Poly ElasticNet  17.086267
7     RandomForest Ensemble  14.309778

Hyperparameter

random_state: random seed fixed value. Stay tuned and tune in!
n_jobs: number of CPU usage
max_depth: the maximum depth that can be deepened. To prevent overfitting
n_estimators: the number of trees to ensemble
max_features: The maximum number of features to use. To prevent overfitting
min_samples_split: the minimum number of samples when the tree splits. default=2. To prevent overfitting

Image('https://teddylee777.github.io/images/2020-01-09/decistion-tree.png', width=600)

Be sure to fix the random_state value when tuning

rfr = RandomForestRegressor(random_state = 42, n_estimators = 1000, max_depth = 7, max_features = 0.8)
rfr.fit(x_train, y_train)
rfr_pred = rfr.predict(x_test)
mse_eval('RandomForest Ensemble w/ Tuning', rfr_pred, y_test)

                             model        mse
0         ElasticNet(l1_ratio=0.8)  23.074116
1                 LinearRegression  22.641613
2                Lasso(alpha=0.01)  22.279112
3              Standard ElasticNet  22.058335
4                   Ridge(alpha=1)  21.869433
5                  Voting Ensemble  20.521588
6                  Poly ElasticNet  17.086267
7            RandomForest Ensemble  14.309778
8  RandomForest Ensemble w/ Tuning  14.070580

Twitter Facebook LinkedIn

Ensemble(Voting and Bagging)

Ensemble(Voting and Bagging)

single regression prediction model

Ensemble

Voting - Regression

Voting - Classification

Bagging

RandomForest

공유하기

댓글남기기

참고

Predicting_income

seasonal_decompose

Dickey Fuller Test

Arima_forecast