__init__.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #
  2. # Statistics test
  3. #
  4. import numpy as np
  5. from scipy.stats.distributions import chi2
  6. import warnings
  7. from functools import reduce
  8. from operator import mul
  9. # chisquare_trend_contingency
  10. def chisquare_trend_contingency (observed, tendencies_values, axis = None):
  11. """
  12. chisquare_trend_contingency
  13. Apply a Chi2 square Cochran-Armitage trend test with variable in a contingency table.
  14. It measure the effect of an tendencie variable on a evaluated variable.
  15. The evaluated variable should be a categorial variable with 2 modalities. It should be of shape 2.
  16. The tendencies values should be of shape >= 2 an be numeric.
  17. Validation conditions
  18. ----------
  19. - Monotonous evolution of the tendencie (not checked by the function)
  20. - Theorical value >= 5 : checked by the function
  21. Parameters
  22. ----------
  23. observed, numpy matrix : contingency table of observed value. Should have an axis with a shape of 2 as the test is designed to evaluate tendencies on 2 modalities categorial variable. If only an axis is of shape 2, this axis is used for applying the test, otherwise you should specify the axis containing the tested variable with the axis parameter.
  24. tendencies_values, numpy vector : values of the tendencie variable, should be an 1D vector of shape >= 2
  25. Returns
  26. ----
  27. chi2 : float
  28. The test statistic.
  29. p : float
  30. The p-value of the test
  31. dof : int
  32. Degrees of freedom
  33. expected : ndarray, same shape as `observed`
  34. The expected frequencies, based on the marginal sums of the table.
  35. """
  36. # Making all numpy
  37. observed = np.array(observed)
  38. tendencies_values = np.array(tendencies_values)
  39. # Checking data
  40. ## Observed
  41. observed_shape = np.array(observed.shape)
  42. if ((observed_shape == 2).sum() == 0):
  43. # Exception if absence of shape 2
  44. raise Exception("Observed should contains at list of element of shape 2.")
  45. ## Tendencies values
  46. if(tendencies_values.ndim != 1):
  47. # Exception if dim different of 1
  48. raise Exception("Tendencies values should be an 1D array")
  49. elif(tendencies_values.shape not in observed_shape):
  50. raise Exception("Tendencies values should be contains the same number of element than observed")
  51. # Calculating parameters
  52. n = observed.sum() # Number of observations
  53. ## Getting axis containing the evaluated variable
  54. tmp_axis_id = (np.array(observed.shape) == 2) ## Check which axis is of shape 2
  55. if (tmp_axis_id.sum() == 1):
  56. # If there is only one axis with shape 2 : it is the evaluated variable
  57. axis_id = np.where(tmp_axis_id)[0][0]
  58. else:
  59. # Taking the content of the axis variables
  60. axis_id = axis
  61. if (type(axis_id) != type(int())):
  62. raise Exception("Observed values are of shape (2,2), you should specify the axis of the evaluated variable with the axis parameter (0 or 1)")
  63. # Getting theorical values
  64. expected = (observed.sum(axis = 1)/n).reshape(-1, 1)*observed.sum(axis = 0)
  65. # Getting dof
  66. dof = 1 # By desing in this test, this is a 1 dof
  67. # Checking condition parameter
  68. ## Every expected value should be >= 5
  69. if ((expected < 5).sum() != 0):
  70. warnings.warn("Test condition not respected, every expected value should be >= 5")
  71. # Getting test parameter
  72. chi2_parameter = (
  73. (
  74. np.power(n, 3)
  75. *np.square(sum(tendencies_values*(np.take(observed, 0, axis_id)-np.take(expected, 0, axis_id))))
  76. )/(
  77. reduce(mul, observed.sum(axis = (1-axis_id)))*
  78. (
  79. n*(observed.sum(axis = axis_id)*np.square(tendencies_values)).sum()
  80. -np.square((observed.sum(axis = axis_id)*(tendencies_values)).sum())
  81. )
  82. )
  83. )
  84. # Getting p
  85. p = chi2.sf(chi2_parameter, dof)
  86. # Returning result
  87. return(chi2_parameter, p, dof, expected)