|
@@ -0,0 +1,90 @@
|
|
|
+import sys
|
|
|
+import os
|
|
|
+from pathlib import Path
|
|
|
+sys.path.append(str(Path(os.path.dirname(__file__)).parent)) # Dirty but it works
|
|
|
+
|
|
|
+from bop_scripts.preprocessing import remove_outliers
|
|
|
+from bop_scripts.nn_models import torchMLPClassifier_sklearn, torchMLP
|
|
|
+from bop_scripts.models import generate_model, fit_all_classifiers
|
|
|
+import torch
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+
|
|
|
+from sklearn.base import BaseEstimator
|
|
|
+
|
|
|
+qualitatives_variables = ["gender", "last_7", "last_30"]
|
|
|
+quantitatives_variables = ['age', 'temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain']
|
|
|
+text_variables = ["chiefcomplaint"]
|
|
|
+labels = ['Cardiaque', 'Coagulation', 'Gazometrie', 'Glycemie_Sanguine', 'Hepato-Biliaire', 'IonoC', 'Lipase', 'NFS', 'Phospho-Calcique']
|
|
|
+variables_ranges = {
|
|
|
+ "temperature":[60,130],
|
|
|
+ "heartrate":[20, 300],
|
|
|
+ "resprate":[5, 50],
|
|
|
+ "o2sat":[20, 100],
|
|
|
+ "sbp":[40, 250],
|
|
|
+ "dbp":[20, 200],
|
|
|
+ "pain":[0,10]
|
|
|
+}
|
|
|
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
+
|
|
|
+def torch_classifier_fn ():
|
|
|
+
|
|
|
+ torch_classifier = torchMLPClassifier_sklearn(
|
|
|
+ torchMLP,
|
|
|
+ early_stop_validations_size=10000,
|
|
|
+ early_stop=True,
|
|
|
+ early_stop_metric="f1",
|
|
|
+ early_stop_tol=1,
|
|
|
+ n_epochs=50,
|
|
|
+ device_train= device,
|
|
|
+ device_predict="cpu",
|
|
|
+ class_weight="balanced",
|
|
|
+ learning_rate=1e-4,
|
|
|
+ verbose=False
|
|
|
+ )
|
|
|
+
|
|
|
+ torch_sklearn_classifier = generate_model(
|
|
|
+ torch_classifier,
|
|
|
+ qualitatives_variables,
|
|
|
+ quantitatives_variables,
|
|
|
+ text_variables[0],
|
|
|
+ remove_outliers=True,
|
|
|
+ outliers_variables_ranges=variables_ranges,
|
|
|
+ CountVectorizer_kwargs={"ngram_range":(1,1), "max_features":600}
|
|
|
+ )
|
|
|
+
|
|
|
+ return torch_sklearn_classifier
|
|
|
+
|
|
|
+class Classifier(BaseEstimator):
|
|
|
+
|
|
|
+ def preprocess (self, X, y=None):
|
|
|
+ X_clean, outliers = remove_outliers(X, variables_ranges)
|
|
|
+ if y is not None:
|
|
|
+ y = pd.DataFrame(y, columns=labels)
|
|
|
+
|
|
|
+ return X_clean, y
|
|
|
+
|
|
|
+ def fit(self, X, y):
|
|
|
+ X, y = self.preprocess(X, y)
|
|
|
+ self.classifiers = fit_all_classifiers(
|
|
|
+ torch_classifier_fn,
|
|
|
+ X,
|
|
|
+ y,
|
|
|
+ verbose=False
|
|
|
+ )
|
|
|
+ return self
|
|
|
+
|
|
|
+ def predict_proba(self, X):
|
|
|
+ X, y = self.preprocess(X)
|
|
|
+ predictions = []
|
|
|
+ y_columns = labels
|
|
|
+ for y_column in y_columns:
|
|
|
+ predictions.append(self.classifiers[y_column].predict(X).reshape(-1, 1))
|
|
|
+ y_pred = np.concatenate(predictions, axis=1)
|
|
|
+
|
|
|
+ return y_pred
|
|
|
+
|
|
|
+ def predict(self, X):
|
|
|
+ y_pred = self.predict_proba(X)
|
|
|
+
|
|
|
+ return (y_pred >= 0.5)*1
|