Classification#

Import des outils / jeu de données#

 1import matplotlib.pyplot as plt
 2import numpy as np
 3import pandas as pd
 4import seaborn as sns
 5import xgboost
 6from catboost import CatBoostClassifier
 7from imblearn.over_sampling import SMOTENC
 8from keras import layers
 9from lightgbm import LGBMClassifier
10from scipy.stats import boxcox
11from sklearn.compose import ColumnTransformer
12from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
13from sklearn.dummy import DummyClassifier
14from sklearn.ensemble import IsolationForest, RandomForestClassifier, VotingClassifier
15from sklearn.feature_selection import mutual_info_regression
16from sklearn.gaussian_process import GaussianProcessClassifier
17from sklearn.inspection import permutation_importance
18from sklearn.linear_model import LogisticRegression
19from sklearn.metrics import (
20    ConfusionMatrixDisplay,
21    accuracy_score,
22    classification_report,
23    confusion_matrix,
24    f1_score,
25    make_scorer,
26)
27from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
28from sklearn.naive_bayes import BernoulliNB, ComplementNB
29from sklearn.neighbors import KNeighborsClassifier
30from sklearn.pipeline import Pipeline
31from sklearn.preprocessing import OneHotEncoder, RobustScaler, StandardScaler
32from sklearn.svm import LinearSVC
33from sklearn.tree import DecisionTreeClassifier
34from tensorflow import keras
35
36from src.classification import create_classification_models
37from src.config import data_folder, seed
38from src.constants import var_cat_non_ohe, var_cat_ohe, var_categoriques, var_numeriques
39from src.utils import init_notebook
2023-12-17 21:40:50.035718: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-17 21:40:50.067898: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-17 21:40:50.067936: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-17 21:40:50.069141: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-17 21:40:50.075004: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-17 21:40:50.075930: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-17 21:40:50.942412: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
1init_notebook()
1df = pd.read_csv(
2    f"{data_folder}/data-cleaned-feature-engineering.csv",
3    sep=",",
4    index_col="ID",
5    parse_dates=True,
6)

Variables globales#

1LABELS = (0, 1)

Fonctions et variables utiles#

1score_modeles = []

Tableau. Informations sur notre classification

|:—————————|:———————————-| | Objectif métier | Prédire l’acceptation à une campagne marketing | | Problème technique | Classification binaire supervisée | | Métrique | Score F1 sur la classe 1 (clients qui acceptent)
À score F1 égal, on choisit la meilleure précision sur la classe 1 | | Méthode d’entraînement | Validation croisée en 5 blocs | | Pré-traitement | Variables quantitatives : centrer/réduire
Variables qualitatives : OneHot Encoding (Tableau Disjonctif Complet) | | Équilibrage des classes| 1) Aucun
2) Sous-échantillonnage aléatoire manuel
3) Sur-échantillonnage avec SMOTE |

 1def evaluate_models(models, prefix, X_train, X_test, y_train, y_test):
 2    """Evalue tous les modèles dans `models` et sauvegarde les résultats avec un préfixe `prefix`
 3    (utile pour distinguer les différentes stratégies de pré-traitement des données)."""
 4    results = []
 5
 6    for model, model_name in models:
 7        name = f"{prefix}/{model_name}"
 8
 9        model.fit(X_train, y_train)
10        y_pred = model.predict(X_test)
11        precision = accuracy_score(y_test, y_pred)
12        scores = cross_val_score(
13            model,
14            X_train,
15            y_train,
16            cv=5,
17            scoring=make_scorer(f1_score, labels=[LABELS[1]]),
18        )
19        scores_mean = scores.mean()
20        scores_std = scores.std()
21
22        ## clf_report = pd.DataFrame(
23        #     classification_report(y_test, y_pred, output_dict=True)
24        ## ).T
25        ## cm = confusion_matrix(y_test, y_pred, labels=LABELS, normalize="true")
26        ## ## sns.heatmap(cm, annot=True, cmap="Purples", vmin=0, vmax=1)
27        #
28        ## score_f1_classe1 = clf_report.iloc[1, 2]
29
30        results.append(
31            [
32                name,
33                scores_mean,
34                #             scores_std,
35            ]
36        )
37        score_modeles.extend(
38            (
39                [
40                    name,
41                    "score_f1_classe1",
42                    scores_mean,
43                    #               scores_std,
44                    precision,
45                ],
46            )
47        )
48
49    return results

Liste des modèles#

Tableau. Liste des modèles de notre étude

|:—————————|:———————————-| | Modèles de référence | Classificateur Idiot Uniforme (50% de oui et 50% de non)
Classificateur Idiot Constant 1 (100% de oui) | | Modèles linéaires | Régression logistique
Analyse Discriminante Linéaire | | Arbres de décision | Arbre de décision
Forêt d’arbres de décision (Random Forest) | | Gradient Boosting | XGBoost
LightGBM
CatBoost | | Machine à vecteurs de support (SVM) | Classificateur SVM linéaire | | k plus proches voisins (k-NN) | Classificateur k-nn (5 voisins)
Classificateur k-nn (15 voisins) | | Modèle de vote| Modèle de “Vote à la majorité” sur 5 modèles :
- Régression logistique
- Analyse discriminante linéaire
- Random Forest
- XGBoost
- CatBoost | | Réseau de neurones| Réseau de neurones à 5 couches et 1 600 neurones |

1models = create_classification_models(seed)

Traitement des données#

Pipeline Scaler & OneHotEncoding#

1df["HasAcceptedCampaigns"] = df["HasAcceptedCampaigns"].astype(int)
1## ohe = OneHotEncoder()
1## ohe.fit_transform(df[var_cat_ohe])
1df[var_categoriques].head()
Education Marital_Status Kidhome Teenhome AcceptedCmp1 AcceptedCmp2 AcceptedCmp3 AcceptedCmp4 AcceptedCmp5 Response
ID
5524 Graduation Single 0 0 0 0 0 0 0 1
2174 Graduation Single 1 1 0 0 0 0 0 0
4141 Graduation Together 0 0 0 0 0 0 0 0
6182 Graduation Together 1 0 0 0 0 0 0 0
5324 PhD Married 1 0 0 0 0 0 0 0
1preprocessor = ColumnTransformer(
2    remainder="passthrough",
3    transformers=[
4        ("ohe", OneHotEncoder(), var_cat_ohe),
5        ("scaler", RobustScaler(), var_numeriques),
6    ],
7)
1X = df.drop(columns=["Response", "Dt_Customer"])
1preprocessor.fit(X)
ColumnTransformer(remainder='passthrough',
                  transformers=[('ohe', OneHotEncoder(),
                                 ['Education', 'Marital_Status', 'Kidhome',
                                  'Teenhome', 'NbAcceptedCampaigns',
                                  'NbChildren']),
                                ('scaler', RobustScaler(),
                                 ['Year_Birth', 'Income', 'Recency', 'MntWines',
                                  'MntFruits', 'MntMeatProducts',
                                  'MntFishProducts', 'MntSweetProducts',
                                  'MntGoldProds', 'NumDealsPurchases',
                                  'NumWebPurchases', 'NumCatalogPurchases',
                                  'NumStorePurchases', 'NumWebVisitsMonth'])])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
1preprocessor.transform(X)
array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])
1nouveau_df = pd.DataFrame(preprocessor.transform(X), index=df.index)
1nouveau_df.shape
(2052, 45)
1## pipeline = Pipeline(steps=[("scaler", RobustScaler())])
2
3## preprocessor = ColumnTransformer(
4#     remainder="passthrough",
5#     transformers=[
6#         ("std", standard_transformer, [3]),
7#     ],
8## )
1## under_sampling_manuel = Pipeline(steps=[()])

Par défaut#

1y = df[["Response"]].astype(int)
1X_train, X_test, y_train, y_test = train_test_split(
2    nouveau_df, y, test_size=0.2, random_state=seed
3)
1nouveau_df.shape
(2052, 45)
1prefix = "défaut"
2results = evaluate_models(models, prefix, X_train, X_test, y_train, y_test)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
1sorted(results, key=lambda x: x[1], reverse=True)
[['défaut/LinearDiscriminantAnalysis', 0.5457873305066958],
 ['défaut/LogisticRegression', 0.5300741717327082],
 ['défaut/LinearSVC', 0.5266264112213479],
 ['défaut/VotingClassifier', 0.5048525204014583],
 ['défaut/CatBoostClassifier', 0.48986074706622657],
 ['défaut/XGBClassifier', 0.4612210194041914],
 ['défaut/LGBMClassifier', 0.4491622877623547],
 ['défaut/DecisionTreeClassifier', 0.4145184590700617],
 ['défaut/RandomForestClassifier', 0.3955075959382346],
 ['défaut/DummyClassifier_Constant1', 0.2551837011118009],
 ['défaut/KNeighborsClassifier5', 0.2450767474496288],
 ['défaut/DummyClassifier_Uniform', 0.2171772519717725],
 ['défaut/KNeighborsClassifier15', 0.13868175622739518]]
1score_modeles
[['défaut/DummyClassifier_Uniform',
  'score_f1_classe1',
  0.2171772519717725,
  0.49391727493917276],
 ['défaut/DummyClassifier_Constant1',
  'score_f1_classe1',
  0.2551837011118009,
  0.17274939172749393],
 ['défaut/LogisticRegression',
  'score_f1_classe1',
  0.5300741717327082,
  0.8442822384428224],
 ['défaut/LinearDiscriminantAnalysis',
  'score_f1_classe1',
  0.5457873305066958,
  0.8345498783454988],
 ['défaut/DecisionTreeClassifier',
  'score_f1_classe1',
  0.4145184590700617,
  0.805352798053528],
 ['défaut/RandomForestClassifier',
  'score_f1_classe1',
  0.3955075959382346,
  0.8467153284671532],
 ['défaut/XGBClassifier',
  'score_f1_classe1',
  0.4612210194041914,
  0.851581508515815],
 ['défaut/CatBoostClassifier',
  'score_f1_classe1',
  0.48986074706622657,
  0.8588807785888077],
 ['défaut/LGBMClassifier',
  'score_f1_classe1',
  0.4491622877623547,
  0.8564476885644768],
 ['défaut/LinearSVC',
  'score_f1_classe1',
  0.5266264112213479,
  0.8491484184914841],
 ['défaut/KNeighborsClassifier5',
  'score_f1_classe1',
  0.2450767474496288,
  0.8540145985401459],
 ['défaut/KNeighborsClassifier15',
  'score_f1_classe1',
  0.13868175622739518,
  0.8321167883211679],
 ['défaut/VotingClassifier',
  'score_f1_classe1',
  0.5048525204014583,
  0.8564476885644768]]

Équilibrage des classes#

Under-sampling (manuel)#

1## samples0 = df[df["Response"] == 0].sample(350, random_state=seed)
1## X_eq = pd.concat((samples0, df[df["Response"] == 1]))
1## X_eq["Response"].hist()
1## y_eq = X_eq.pop("Response").astype(int)
1## X_eq = pd.get_dummies(X_eq.drop(columns=["Dt_Customer"]))
1## df["Response"].value_counts()
1NB_A_SUPPRIMER = 1400
2drop_indices = np.random.choice(
3    nouveau_df[y["Response"] == 0].index, NB_A_SUPPRIMER, replace=False
4)
5df_subset = nouveau_df.drop(drop_indices)
1y_eq = y.drop(index=drop_indices)
1X_train, X_test, y_train, y_test = train_test_split(
2    df_subset, y_eq, test_size=0.2, random_state=seed
3)
1prefix = "éq_classes"
2results = evaluate_models(models, prefix, X_train, X_test, y_train, y_test)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
1sorted(results, key=lambda x: x[1], reverse=True)
[['éq_classes/LogisticRegression', 0.7941343818734119],
 ['éq_classes/CatBoostClassifier', 0.7938368158804571],
 ['éq_classes/LinearDiscriminantAnalysis', 0.7938211762491106],
 ['éq_classes/VotingClassifier', 0.7920346320346321],
 ['éq_classes/LinearSVC', 0.7908569588129786],
 ['éq_classes/LGBMClassifier', 0.7834421677461864],
 ['éq_classes/XGBClassifier', 0.7642432318617356],
 ['éq_classes/RandomForestClassifier', 0.7484611585366764],
 ['éq_classes/DecisionTreeClassifier', 0.7168816591954977],
 ['éq_classes/DummyClassifier_Constant1', 0.6589356893363467],
 ['éq_classes/KNeighborsClassifier5', 0.6585194388262133],
 ['éq_classes/KNeighborsClassifier15', 0.6425526464663587],
 ['éq_classes/DummyClassifier_Uniform', 0.553094245204337]]

Over-sampling (SMOTE)#

1cat_cols = list(X.select_dtypes(include=["category", "int", "bool"]).columns)
1cat_cols_index = list(map(lambda c: list(X.columns).index(c), cat_cols))
1list(X.iloc[:, cat_cols_index].columns)
['Year_Birth',
 'Kidhome',
 'Teenhome',
 'Recency',
 'MntWines',
 'MntFruits',
 'MntMeatProducts',
 'MntFishProducts',
 'MntSweetProducts',
 'MntGoldProds',
 'NumDealsPurchases',
 'NumWebPurchases',
 'NumCatalogPurchases',
 'NumStorePurchases',
 'NumWebVisitsMonth',
 'AcceptedCmp3',
 'AcceptedCmp4',
 'AcceptedCmp5',
 'AcceptedCmp1',
 'AcceptedCmp2',
 'NbAcceptedCampaigns',
 'HasAcceptedCampaigns',
 'NbChildren']
1sm = SMOTENC(
2    categorical_features=cat_cols_index,
3    random_state=seed,
4)
1X_train, X_test, y_train, y_test = train_test_split(
2    nouveau_df, y, test_size=0.2, random_state=seed
3)
1X_train_sm, y_train_sm = sm.fit_resample(X_train, y_train)
1y_train_sm.value_counts(normalize=True)
Response
0           0.5
1           0.5
dtype: float64
1prefix = "SMOTE"
2results = evaluate_models(models, prefix, X_train_sm, X_test, y_train_sm, y_test)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/neighbors/_classification.py:233: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:97: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:132: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
/home/runner/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[44], line 2
      1 prefix = "SMOTE"
----> 2 results = evaluate_models(models, prefix, X_train_sm, X_test, y_train_sm, y_test)

Cell In[6], line 12, in evaluate_models(models, prefix, X_train, X_test, y_train, y_test)
     10 y_pred = model.predict(X_test)
     11 precision = accuracy_score(y_test, y_pred)
---> 12 scores = cross_val_score(
     13     model,
     14     X_train,
     15     y_train,
     16     cv=5,
     17     scoring=make_scorer(f1_score, labels=[LABELS[1]]),
     18 )
     19 scores_mean = scores.mean()
     20 scores_std = scores.std()

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:562, in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    559 # To ensure multimetric format is not supported
    560 scorer = check_scoring(estimator, scoring=scoring)
--> 562 cv_results = cross_validate(
    563     estimator=estimator,
    564     X=X,
    565     y=y,
    566     groups=groups,
    567     scoring={"score": scorer},
    568     cv=cv,
    569     n_jobs=n_jobs,
    570     verbose=verbose,
    571     fit_params=fit_params,
    572     pre_dispatch=pre_dispatch,
    573     error_score=error_score,
    574 )
    575 return cv_results["test_score"]

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/_param_validation.py:214, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    208 try:
    209     with config_context(
    210         skip_parameter_validation=(
    211             prefer_skip_nested_validation or global_skip_validation
    212         )
    213     ):
--> 214         return func(*args, **kwargs)
    215 except InvalidParameterError as e:
    216     # When the function is just a wrapper around an estimator, we allow
    217     # the function to delegate validation to the estimator, but we replace
    218     # the name of the estimator by the name of the function in the error
    219     # message to avoid confusion.
    220     msg = re.sub(
    221         r"parameter of \w+ must be",
    222         f"parameter of {func.__qualname__} must be",
    223         str(e),
    224     )

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:309, in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, return_indices, error_score)
    306 # We clone the estimator to make sure that all the folds are
    307 # independent, and that it is pickle-able.
    308 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
--> 309 results = parallel(
    310     delayed(_fit_and_score)(
    311         clone(estimator),
    312         X,
    313         y,
    314         scorers,
    315         train,
    316         test,
    317         verbose,
    318         None,
    319         fit_params,
    320         return_train_score=return_train_score,
    321         return_times=True,
    322         return_estimator=return_estimator,
    323         error_score=error_score,
    324     )
    325     for train, test in indices
    326 )
    328 _warn_or_raise_about_fit_failures(results, error_score)
    330 # For callable scoring, the return type is only know after calling. If the
    331 # return type is a dictionary, the error scores can now be inserted with
    332 # the correct key.

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/parallel.py:65, in Parallel.__call__(self, iterable)
     60 config = get_config()
     61 iterable_with_config = (
     62     (_with_config(delayed_func, config), args, kwargs)
     63     for delayed_func, args, kwargs in iterable
     64 )
---> 65 return super().__call__(iterable_with_config)

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/joblib/parallel.py:1863, in Parallel.__call__(self, iterable)
   1861     output = self._get_sequential_output(iterable)
   1862     next(output)
-> 1863     return output if self.return_generator else list(output)
   1865 # Let's create an ID that uniquely identifies the current call. If the
   1866 # call is interrupted early and that the same instance is immediately
   1867 # re-used, this id will be used to prevent workers that were
   1868 # concurrently finalizing a task from the previous call to run the
   1869 # callback.
   1870 with self._lock:

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/joblib/parallel.py:1792, in Parallel._get_sequential_output(self, iterable)
   1790 self.n_dispatched_batches += 1
   1791 self.n_dispatched_tasks += 1
-> 1792 res = func(*args, **kwargs)
   1793 self.n_completed_tasks += 1
   1794 self.print_progress()

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/parallel.py:127, in _FuncWrapper.__call__(self, *args, **kwargs)
    125     config = {}
    126 with config_context(**config):
--> 127     return self.function(*args, **kwargs)

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:729, in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
    727         estimator.fit(X_train, **fit_params)
    728     else:
--> 729         estimator.fit(X_train, y_train, **fit_params)
    731 except Exception:
    732     # Note fit time as time until error
    733     fit_time = time.time() - start_time

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1145     estimator._validate_params()
   1147 with config_context(
   1148     skip_parameter_validation=(
   1149         prefer_skip_nested_validation or global_skip_validation
   1150     )
   1151 ):
-> 1152     return fit_method(estimator, *args, **kwargs)

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/ensemble/_voting.py:349, in VotingClassifier.fit(self, X, y, sample_weight)
    346 self.classes_ = self.le_.classes_
    347 transformed_y = self.le_.transform(y)
--> 349 return super().fit(X, transformed_y, sample_weight)

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/ensemble/_voting.py:81, in _BaseVoting.fit(self, X, y, sample_weight)
     75 if self.weights is not None and len(self.weights) != len(self.estimators):
     76     raise ValueError(
     77         "Number of `estimators` and weights must be equal; got"
     78         f" {len(self.weights)} weights, {len(self.estimators)} estimators"
     79     )
---> 81 self.estimators_ = Parallel(n_jobs=self.n_jobs)(
     82     delayed(_fit_single_estimator)(
     83         clone(clf),
     84         X,
     85         y,
     86         sample_weight=sample_weight,
     87         message_clsname="Voting",
     88         message=self._log_message(names[idx], idx + 1, len(clfs)),
     89     )
     90     for idx, clf in enumerate(clfs)
     91     if clf != "drop"
     92 )
     94 self.named_estimators_ = Bunch()
     96 # Uses 'drop' as placeholder for dropped estimators

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/parallel.py:65, in Parallel.__call__(self, iterable)
     60 config = get_config()
     61 iterable_with_config = (
     62     (_with_config(delayed_func, config), args, kwargs)
     63     for delayed_func, args, kwargs in iterable
     64 )
---> 65 return super().__call__(iterable_with_config)

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/joblib/parallel.py:1863, in Parallel.__call__(self, iterable)
   1861     output = self._get_sequential_output(iterable)
   1862     next(output)
-> 1863     return output if self.return_generator else list(output)
   1865 # Let's create an ID that uniquely identifies the current call. If the
   1866 # call is interrupted early and that the same instance is immediately
   1867 # re-used, this id will be used to prevent workers that were
   1868 # concurrently finalizing a task from the previous call to run the
   1869 # callback.
   1870 with self._lock:

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/joblib/parallel.py:1792, in Parallel._get_sequential_output(self, iterable)
   1790 self.n_dispatched_batches += 1
   1791 self.n_dispatched_tasks += 1
-> 1792 res = func(*args, **kwargs)
   1793 self.n_completed_tasks += 1
   1794 self.print_progress()

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/utils/parallel.py:127, in _FuncWrapper.__call__(self, *args, **kwargs)
    125     config = {}
    126 with config_context(**config):
--> 127     return self.function(*args, **kwargs)

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/sklearn/ensemble/_base.py:36, in _fit_single_estimator(estimator, X, y, sample_weight, message_clsname, message)
     34 else:
     35     with _print_elapsed_time(message_clsname, message):
---> 36         estimator.fit(X, y)
     37 return estimator

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/catboost/core.py:5100, in CatBoostClassifier.fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   5097 if 'loss_function' in params:
   5098     CatBoostClassifier._check_is_compatible_loss(params['loss_function'])
-> 5100 self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
   5101           eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period,
   5102           silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   5103 return self

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/catboost/core.py:2319, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   2315 allow_clear_pool = train_params["allow_clear_pool"]
   2317 with log_fixup(log_cout, log_cerr), \
   2318     plot_wrapper(plot, plot_file, 'Training plots', [_get_train_dir(self.get_params())]):
-> 2319     self._train(
   2320         train_pool,
   2321         train_params["eval_sets"],
   2322         params,
   2323         allow_clear_pool,
   2324         train_params["init_model"]
   2325     )
   2327 # Have property feature_importance possibly set
   2328 loss = self._object._get_loss_function_name()

File ~/.cache/pypoetry/virtualenvs/customer-base-analysis-F-W2gxNr-py3.10/lib/python3.10/site-packages/catboost/core.py:1723, in _CatBoostBase._train(self, train_pool, test_pool, params, allow_clear_pool, init_model)
   1722 def _train(self, train_pool, test_pool, params, allow_clear_pool, init_model):
-> 1723     self._object._train(train_pool, test_pool, params, allow_clear_pool, init_model._object if init_model else None)
   1724     self._set_trained_model_attributes()

File _catboost.pyx:4645, in _catboost._CatBoost._train()

File _catboost.pyx:4694, in _catboost._CatBoost._train()

KeyboardInterrupt: 
1sorted(results, key=lambda x: x[1], reverse=True)

Réseau de neurones#

1X_train, X_test, y_train, y_test = train_test_split(
2    df_subset, y_eq, test_size=0.2, random_state=seed
3)
1X_train = np.asarray(X_train).astype("float32")
2y_train = np.asarray(y_train).astype("float32")
3X_test = np.asarray(X_test).astype("float32")
4y_test = np.asarray(y_test).astype("float32")
1X_train.shape
 1np.random.seed(seed)
 2model = keras.Sequential(
 3    [
 4        layers.Dense(400, activation="relu", input_shape=[X_train.shape[1]]),
 5        layers.Dense(400, activation="relu"),
 6        layers.Dense(400, activation="sigmoid"),
 7        layers.Dense(400, activation="relu"),
 8        layers.Dense(1, activation="sigmoid"),
 9    ]
10)
1model.compile(
2    optimizer="adam",
3    loss="binary_crossentropy",
4    metrics=["binary_accuracy"],  ## "binary_accuracy"
5)
1early_stopping = keras.callbacks.EarlyStopping(
2    patience=10,
3    min_delta=0.001,
4    restore_best_weights=True,
5)
 1history = model.fit(
 2    X_train,
 3    y_train,
 4    validation_data=(X_test, y_test),
 5    ## validation_split=0.2,
 6    batch_size=512,
 7    epochs=1000,
 8    callbacks=[early_stopping],
 9    verbose=0,  ## hide the output because we have so many epochs
10)
 1history_df = pd.DataFrame(history.history)
 2## Start the plot at epoch 5
 3history_df.loc[5:, ["loss", "val_loss"]].plot()
 4history_df.loc[5:, ["binary_accuracy", "val_binary_accuracy"]].plot()
 5
 6print(
 7    ("Best Validation Loss: {:0.4f}" + "\nBest Validation Accuracy: {:0.4f}").format(
 8        history_df["val_loss"].min(), history_df["val_binary_accuracy"].max()
 9    )
10)
1y_pred = model.predict(X_test)
1sns.histplot(y_pred > 0.5, discrete=True)
1y_pred_old = y_pred
1y_pred = y_pred > 0.5
1print(classification_report(y_test, y_pred, labels=LABELS))
1cm = confusion_matrix(y_test, y_pred, labels=LABELS)
2disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=LABELS)
3disp.plot()
1nom_modele = "Réseau de Neurones"
2## ajout_score(model, nom_modele, y_test, y_pred)

Diagnostic du modèle#

RandomForest feature importance#

1nom_colonnes = preprocessor.get_feature_names_out(X.columns)
1nom_colonnes = list(map(lambda x: x.split("__")[1], nom_colonnes))
1rf = models[6][0]
1fi = rf.feature_importances_
1fi = pd.DataFrame(fi.reshape((1, len(fi))), columns=nom_colonnes)
1fi = fi.sort_values(
2    by=0, axis=1, ascending=False
3)  ## trier les colonnes en fonction de la ligne 0
1plt.figure(figsize=(5, 12))
2plt.title("Importance donnée par le modèle RandomForest")
3sns.barplot(fi, orient="h", color="gray")

Permutation importance#

1result = permutation_importance(rf, X_test, y_test, n_repeats=10, random_state=seed)
1## sns.histplot(result.importances_std < 0.015)
1pi_results = result.importances_mean
1pi_results = pd.DataFrame(
2    pi_results.reshape((1, len(pi_results))), columns=nom_colonnes
3)
4pi_results = pi_results.sort_values(by=0, axis=1, ascending=False)
1plt.figure(figsize=(5, 12))
2plt.title("Importance de Permutation du modèle RandomForest")
3sns.barplot(pi_results, orient="h", color="gray")

Pour aller plus loin#

  • afficher les intervalles de confiance des scores de validation croisée

  • optimiser les hyper-paramètres des modèles (avec une recherche en grille ou une recherche bayésienne)

  • tester les modèles sur différents sous-ensembles de variables pour les comparer

Sauvegarde des données#

1score_modeles_df = pd.DataFrame(
2    score_modeles, columns=["Modèle", "Métrique", "Valeur", "Précision"]
3)
1score_modeles_df.to_csv(f"{data_folder}/results/classifications.csv", index=False)