alibell
/
biologyOrderPredictor


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
							import sys
import os
from pathlib import Path
sys.path.append(str(Path(os.path.dirname(__file__)).parent)) # Dirty but it works

from bop_scripts.preprocessing import remove_outliers
from bop_scripts.nn_models import torchMLPClassifier_sklearn, torchMLP
from bop_scripts.models import generate_model, fit_all_classifiers
import torch
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator

qualitatives_variables = ["gender", "last_7", "last_30"]
quantitatives_variables = ['age', 'temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain']
text_variables = ["chiefcomplaint"]
labels = ['Cardiaque', 'Coagulation', 'Gazometrie', 'Glycemie_Sanguine', 'Hepato-Biliaire', 'IonoC', 'Lipase', 'NFS', 'Phospho-Calcique']
variables_ranges = {
    "temperature":[60,130],
    "heartrate":[20, 300],
    "resprate":[5, 50],
    "o2sat":[20, 100],
    "sbp":[40, 250],
    "dbp":[20, 200],
    "pain":[0,10]
}
device = "cuda:0" if torch.cuda.is_available() else "cpu"

def torch_classifier_fn ():

    torch_classifier = torchMLPClassifier_sklearn(
        torchMLP,
        early_stop_validations_size=10000,
        early_stop=True,
        early_stop_metric="f1",
        early_stop_tol=1,
        n_epochs=50,
        device_train= device,
        device_predict="cpu",
        class_weight="balanced",
        learning_rate=1e-4,
        verbose=False
    )

    torch_sklearn_classifier = generate_model(
            torch_classifier,
            qualitatives_variables,
            quantitatives_variables,
            text_variables[0],
            remove_outliers=True,
            outliers_variables_ranges=variables_ranges,
            CountVectorizer_kwargs={"ngram_range":(1,1), "max_features":600}
    )

    return torch_sklearn_classifier

class Classifier(BaseEstimator):

    def preprocess (self, X, y=None):
        X_clean, outliers = remove_outliers(X, variables_ranges)
        if y is not None:
            y = pd.DataFrame(y, columns=labels)

        return X_clean, y

    def fit(self, X, y):
        X, y = self.preprocess(X, y)
        self.classifiers = fit_all_classifiers(
            torch_classifier_fn,
            X,
            y,
            verbose=False
        )
        return self

    def predict_proba(self, X):
        X, y = self.preprocess(X)
        predictions = []
        y_columns = labels
        for y_column in y_columns:
            predictions.append(self.classifiers[y_column].predict_proba(X)[:,1].reshape(-1, 1))
        y_pred = np.concatenate(predictions, axis=1)

        return y_pred

    def predict(self, X):
        y_pred = self.predict_proba(X)

        return (y_pred >= 0.5)*1