classifier.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import sys
  2. import os
  3. from pathlib import Path
  4. sys.path.append(str(Path(os.path.dirname(__file__)).parent)) # Dirty but it works
  5. from bop_scripts.preprocessing import remove_outliers
  6. from bop_scripts.nn_models import torchMLPClassifier_sklearn, torchMLP
  7. from bop_scripts.models import generate_model, fit_all_classifiers
  8. import torch
  9. import pandas as pd
  10. import numpy as np
  11. from sklearn.base import BaseEstimator
  12. qualitatives_variables = ["gender", "last_7", "last_30"]
  13. quantitatives_variables = ['age', 'temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain']
  14. text_variables = ["chiefcomplaint"]
  15. labels = ['Cardiaque', 'Coagulation', 'Gazometrie', 'Glycemie_Sanguine', 'Hepato-Biliaire', 'IonoC', 'Lipase', 'NFS', 'Phospho-Calcique']
  16. variables_ranges = {
  17. "temperature":[60,130],
  18. "heartrate":[20, 300],
  19. "resprate":[5, 50],
  20. "o2sat":[20, 100],
  21. "sbp":[40, 250],
  22. "dbp":[20, 200],
  23. "pain":[0,10]
  24. }
  25. device = "cuda:0" if torch.cuda.is_available() else "cpu"
  26. def torch_classifier_fn ():
  27. torch_classifier = torchMLPClassifier_sklearn(
  28. torchMLP,
  29. early_stop_validations_size=10000,
  30. early_stop=True,
  31. early_stop_metric="f1",
  32. early_stop_tol=1,
  33. n_epochs=50,
  34. device_train= device,
  35. device_predict="cpu",
  36. class_weight="balanced",
  37. learning_rate=1e-4,
  38. verbose=False
  39. )
  40. torch_sklearn_classifier = generate_model(
  41. torch_classifier,
  42. qualitatives_variables,
  43. quantitatives_variables,
  44. text_variables[0],
  45. remove_outliers=True,
  46. outliers_variables_ranges=variables_ranges,
  47. CountVectorizer_kwargs={"ngram_range":(1,1), "max_features":600}
  48. )
  49. return torch_sklearn_classifier
  50. class Classifier(BaseEstimator):
  51. def preprocess (self, X, y=None):
  52. X_clean, outliers = remove_outliers(X, variables_ranges)
  53. if y is not None:
  54. y = pd.DataFrame(y, columns=labels)
  55. return X_clean, y
  56. def fit(self, X, y):
  57. X, y = self.preprocess(X, y)
  58. self.classifiers = fit_all_classifiers(
  59. torch_classifier_fn,
  60. X,
  61. y,
  62. verbose=False
  63. )
  64. return self
  65. def predict_proba(self, X):
  66. X, y = self.preprocess(X)
  67. predictions = []
  68. y_columns = labels
  69. for y_column in y_columns:
  70. predictions.append(self.classifiers[y_column].predict(X).reshape(-1, 1))
  71. y_pred = np.concatenate(predictions, axis=1)
  72. return y_pred
  73. def predict(self, X):
  74. y_pred = self.predict_proba(X)
  75. return (y_pred >= 0.5)*1