Quellcode durchsuchen

Adding starting kiit classifier

Ali vor 2 Jahren
Ursprung
Commit
cc65a3a794
1 geänderte Dateien mit 90 neuen und 0 gelöschten Zeilen
  1. 90 0
      submissions/starting_kit/classifier.py

+ 90 - 0
submissions/starting_kit/classifier.py

@@ -0,0 +1,90 @@
+import sys
+import os
+from pathlib import Path
+sys.path.append(str(Path(os.path.dirname(__file__)).parent)) # Dirty but it works
+
+from bop_scripts.preprocessing import remove_outliers
+from bop_scripts.nn_models import torchMLPClassifier_sklearn, torchMLP
+from bop_scripts.models import generate_model, fit_all_classifiers
+import torch
+import pandas as pd
+import numpy as np
+
+from sklearn.base import BaseEstimator
+
+qualitatives_variables = ["gender", "last_7", "last_30"]
+quantitatives_variables = ['age', 'temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain']
+text_variables = ["chiefcomplaint"]
+labels = ['Cardiaque', 'Coagulation', 'Gazometrie', 'Glycemie_Sanguine', 'Hepato-Biliaire', 'IonoC', 'Lipase', 'NFS', 'Phospho-Calcique']
+variables_ranges = {
+    "temperature":[60,130],
+    "heartrate":[20, 300],
+    "resprate":[5, 50],
+    "o2sat":[20, 100],
+    "sbp":[40, 250],
+    "dbp":[20, 200],
+    "pain":[0,10]
+}
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+def torch_classifier_fn ():
+
+    torch_classifier = torchMLPClassifier_sklearn(
+        torchMLP,
+        early_stop_validations_size=10000,
+        early_stop=True,
+        early_stop_metric="f1",
+        early_stop_tol=1,
+        n_epochs=50,
+        device_train= device,
+        device_predict="cpu",
+        class_weight="balanced",
+        learning_rate=1e-4,
+        verbose=False
+    )
+
+    torch_sklearn_classifier = generate_model(
+            torch_classifier,
+            qualitatives_variables,
+            quantitatives_variables,
+            text_variables[0],
+            remove_outliers=True,
+            outliers_variables_ranges=variables_ranges,
+            CountVectorizer_kwargs={"ngram_range":(1,1), "max_features":600}
+    )
+
+    return torch_sklearn_classifier
+
+class Classifier(BaseEstimator):
+
+    def preprocess (self, X, y=None):
+        X_clean, outliers = remove_outliers(X, variables_ranges)
+        if y is not None:
+            y = pd.DataFrame(y, columns=labels)
+
+        return X_clean, y
+
+    def fit(self, X, y):
+        X, y = self.preprocess(X, y)
+        self.classifiers = fit_all_classifiers(
+            torch_classifier_fn,
+            X,
+            y,
+            verbose=False
+        )
+        return self
+
+    def predict_proba(self, X):
+        X, y = self.preprocess(X)
+        predictions = []
+        y_columns = labels
+        for y_column in y_columns:
+            predictions.append(self.classifiers[y_column].predict(X).reshape(-1, 1))
+        y_pred = np.concatenate(predictions, axis=1)
+
+        return y_pred
+
+    def predict(self, X):
+        y_pred = self.predict_proba(X)
+
+        return (y_pred >= 0.5)*1