test_qualitatif.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import pandas as pd
  2. import numpy as np
  3. from scipy.stats import chi2_contingency, fisher_exact
  4. from scipy.stats import normaltest, kstest, levene, ttest_ind, mannwhitneyu, f_oneway, kruskal
  5. import statsmodels.stats.weightstats as ws
  6. # Qualitatif
  7. class testQualitatif ():
  8. """
  9. Applique le test qualitatif le plus adapté à partir d'un jeu de données
  10. df : jeu de données
  11. y : variable à tester
  12. x : variable dont on souhaite mesurer l'impact
  13. """
  14. def __init__ (self, df, y, x):
  15. # Calcul du tableau de contingence
  16. self.contingency = self._get_contingency(df, y, x)
  17. def _get_contingency (self, df, y, x):
  18. contingency = pd.DataFrame(
  19. {"n":df.groupby(x)[y].value_counts()}
  20. ).reset_index() \
  21. .pivot_table(index = [x], columns = [y]) \
  22. .fillna(0)
  23. contingency.columns = contingency.columns.droplevel(0)
  24. return(contingency.values.astype(int))
  25. def best_test (self):
  26. """
  27. Selectionne le meilleur test possible
  28. """
  29. # Ordre de priorité
  30. ## 1. Khi2
  31. ## 2. Khi2 - Yates
  32. ## 3. Student test
  33. ## 4. Absence de test
  34. order_test = {
  35. "khi2":[self.khi2,[False]],
  36. "khi2_yates":[self.khi2,[True]],
  37. "fisher":[self.fisher,[]],
  38. "no_test":[self._no_test, []]
  39. }
  40. ## Application des tests dans l'ordre
  41. for test_name, test in order_test.items():
  42. test_result = test[0](*test[1])
  43. if (test_result["valid"] == True):
  44. return (test_name, test_result)
  45. def khi2 (self, yates_correction = False):
  46. """
  47. Test du Khi-2
  48. Paramètre :
  49. yates_correction : Si True, effectue la correction de Yates
  50. """
  51. # Application du test
  52. test_result = chi2_contingency(self.contingency, correction=yates_correction)
  53. # Validité du test
  54. if yates_correction == False:
  55. khi2_valid = len([True for x in test_result[3] for y in x if y < 5]) == 0
  56. else:
  57. khi2_valid = (len([True for x in test_result[3] for y in x if y < 3]) == 0) \
  58. & (test_result[2] == 1)
  59. # Structuration du résultat
  60. output_result = dict(zip(
  61. ["statistic","p_value", "dof", "theorical_values","observed_values","yates_correction", "valid"],
  62. list(test_result)+[self.contingency, yates_correction, khi2_valid]
  63. ))
  64. return output_result
  65. def fisher (self):
  66. """
  67. Test de Fisher
  68. """
  69. # Vérification des CI
  70. valid = (self.contingency.shape == (2,2))
  71. if (valid == True):
  72. fisher_result = fisher_exact(self.contingency)
  73. output_result = dict(zip(
  74. ["statistic", "p_value", "observed_values","valid"],
  75. list(fisher_result)+[self.contingency, valid]
  76. ))
  77. else:
  78. output_result = {"valid":valid}
  79. return output_result
  80. def _no_test (self):
  81. """
  82. Retourne l'absence de test
  83. """
  84. output_result = {"valid":True}
  85. return output_result