123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- import sys
- import os
- from pathlib import Path
- sys.path.append(str(Path(os.path.dirname(__file__)).parent)) # Dirty but it works
- from bop_scripts.preprocessing import remove_outliers
- from bop_scripts.nn_models import torchMLPClassifier_sklearn, torchMLP
- from bop_scripts.models import generate_model, fit_all_classifiers
- import torch
- import pandas as pd
- import numpy as np
- from sklearn.base import BaseEstimator
- qualitatives_variables = ["gender", "last_7", "last_30"]
- quantitatives_variables = ['age', 'temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain']
- text_variables = ["chiefcomplaint"]
- labels = ['Cardiaque', 'Coagulation', 'Gazometrie', 'Glycemie_Sanguine', 'Hepato-Biliaire', 'IonoC', 'Lipase', 'NFS', 'Phospho-Calcique']
- variables_ranges = {
- "temperature":[60,130],
- "heartrate":[20, 300],
- "resprate":[5, 50],
- "o2sat":[20, 100],
- "sbp":[40, 250],
- "dbp":[20, 200],
- "pain":[0,10]
- }
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
- def torch_classifier_fn ():
- torch_classifier = torchMLPClassifier_sklearn(
- torchMLP,
- early_stop_validations_size=10000,
- early_stop=True,
- early_stop_metric="f1",
- early_stop_tol=1,
- n_epochs=50,
- device_train= device,
- device_predict="cpu",
- class_weight="balanced",
- learning_rate=1e-4,
- verbose=False
- )
- torch_sklearn_classifier = generate_model(
- torch_classifier,
- qualitatives_variables,
- quantitatives_variables,
- text_variables[0],
- remove_outliers=True,
- outliers_variables_ranges=variables_ranges,
- CountVectorizer_kwargs={"ngram_range":(1,1), "max_features":600}
- )
- return torch_sklearn_classifier
- class Classifier(BaseEstimator):
- def preprocess (self, X, y=None):
- X_clean, outliers = remove_outliers(X, variables_ranges)
- if y is not None:
- y = pd.DataFrame(y, columns=labels)
- return X_clean, y
- def fit(self, X, y):
- X, y = self.preprocess(X, y)
- self.classifiers = fit_all_classifiers(
- torch_classifier_fn,
- X,
- y,
- verbose=False
- )
- return self
- def predict_proba(self, X):
- X, y = self.preprocess(X)
- predictions = []
- y_columns = labels
- for y_column in y_columns:
- predictions.append(self.classifiers[y_column].predict_proba(X)[:,1].reshape(-1, 1))
- y_pred = np.concatenate(predictions, axis=1)
- return y_pred
- def predict(self, X):
- y_pred = self.predict_proba(X)
- return (y_pred >= 0.5)*1
|