from scipy import stats as st


import numpy as np


## Random sample from Standard Normal Distribution:
Norm_Samp= np.random.normal(loc=5,scale=3,size=1000)


import matplotlib.pyplot as plt   
import seaborn as sns       

sns.histplot(Norm_Samp,kde=True)
plt.show()


# P(X<=1.96), whre X~N(0,1)
st.norm.cdf(x=1.96,loc=0,scale=1)

0.9750021048517795


# P(X<=5), where X~N(5,9)
st.norm.cdf(x=5,loc=5,scale=3)

0.5


import matplotlib.pyplot as plt

x = np.linspace(-4, 4, 1000)    ## sample points= x
y = st.norm.cdf(x)              ## P(X<=x) for X~N(0,1)

plt.plot(x, y)

[<matplotlib.lines.Line2D at 0x17504da1490>]


import matplotlib.pyplot as plt

x = np.linspace(-4, 4, 1000)    ## sample points= x
y = st.norm.pdf(x)              ## Density function f(x) where X ~ N(0,1)

plt.plot(x, y)

[<matplotlib.lines.Line2D at 0x17508e9ef10>]


## 100 random sample from Uniform(5,10)

U=np.random.uniform(low=5,high=10,size=100)


sns.histplot(U,kde=True)
plt.show()


## P(U<=2.5), where U ~ Uniform(0,5)

st.uniform.cdf(x=2.5,loc=0,scale=5)

0.5


## P(U<=6), where U ~ Uniform(0,5)

st.uniform.cdf(x=6,loc=0,scale=5)

1.0


x = np.linspace(0, 5, 1000)           ## sample points= x
y = st.uniform.cdf(x,loc=0,scale=5)   ## P(X<=x) where, X ~ Uniform(0,5)

plt.plot(x, y)

[<matplotlib.lines.Line2D at 0x17508f7eac0>]


x = np.linspace(0, 5, 1000)           ## sample points= x
y = st.uniform.pdf(x,loc=0,scale=5)   ## Densityfunction f(x) where, X ~ Uniform(0,5)

plt.plot(x, y)

[<matplotlib.lines.Line2D at 0x17507cccfd0>]


data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
stat, p = st.shapiro(data)
print(shapiro(data))
print('stat=%.3f, p=%.3f' % (stat, p))

ShapiroResult(statistic=0.8951009511947632, pvalue=0.19340917468070984)
stat=0.895, p=0.193


round(st.shapiro(data).pvalue,ndigits=3)

0.193


data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
stat, p = st.normaltest(data)
print(st.normaltest(data))
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
 print('Probably Gaussian')
else:
 print('Probably not Gaussian')

NormaltestResult(statistic=3.3918758941075353, pvalue=0.18342710340675566)
stat=3.392, p=0.183
Probably Gaussian


data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
result = st.anderson(data)
result

AndersonResult(statistic=0.4239737141854807, critical_values=array([0.501, 0.57 , 0.684, 0.798, 0.95 ]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]))


print('stat=%.3f' % (result.statistic))
for i in range(len(result.critical_values)):
 sl, cv = result.significance_level[i], result.critical_values[i]
 if result.statistic < cv:
  print('Probably Gaussian at the %.1f%% level' % (sl))
 else:
  print('Probably not Gaussian at the %.1f%% level' % (sl))

stat=0.424
Probably Gaussian at the 15.0% level
Probably Gaussian at the 10.0% level
Probably Gaussian at the 5.0% level
Probably Gaussian at the 2.5% level
Probably Gaussian at the 1.0% level


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579]

print(st.pearsonr(data1, data2))

stat, p = st.pearsonr(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably independent')
else:
 print('Probably dependent')

(0.6879696368388863, 0.02787296951449617)
stat=0.688, p=0.028
Probably dependent


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579]

print(st.spearmanr(data1, data2))

stat, p = st.spearmanr(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably independent')
else:
 print('Probably dependent')

SpearmanrResult(correlation=0.8545454545454544, pvalue=0.0016368033159867143)
stat=0.855, p=0.002
Probably dependent


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579]

print(st.kendalltau(data1, data2))

stat, p = st.kendalltau(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably independent')
else:
 print('Probably dependent')

KendalltauResult(correlation=0.7333333333333333, pvalue=0.002212852733686067)
stat=0.733, p=0.002
Probably dependent


table = [[10, 20, 30],[6,  9,  17]]

print(st.chi2_contingency(table))

stat, p, dof, expected = st.chi2_contingency(table)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably independent')
else:
 print('Probably dependent')

(0.27157465150403504, 0.873028283380073, 2, array([[10.43478261, 18.91304348, 30.65217391],
       [ 5.56521739, 10.08695652, 16.34782609]]))
stat=0.272, p=0.873
Probably independent


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]

print(st.ttest_ind(data1, data2))

stat, p = st.ttest_ind(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably the same distribution')
else:
 print('Probably different distributions')

Ttest_indResult(statistic=-0.3256156287495794, pvalue=0.7484698873615687)
stat=-0.326, p=0.748
Probably the same distribution


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]

print(st.ttest_rel(data1, data2))

stat, p = st.ttest_rel(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably the same distribution')
else:
 print('Probably different distributions')

Ttest_relResult(statistic=-0.3341680721407323, pvalue=0.7459078283577478)
stat=-0.334, p=0.746
Probably the same distribution


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]

print(st.f_oneway(data1, data2, data3))

stat, p = st.f_oneway(data1, data2, data3)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably the same distribution')
else:
 print('Probably different distributions')

F_onewayResult(statistic=0.09641783499925058, pvalue=0.9083957433926546)
stat=0.096, p=0.908
Probably the same distribution


# Import library
import numpy as np
import pandas as pd
from statsmodels.stats.anova import AnovaRM

# Create the data
dataframe = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
						'Oil': np.tile([1, 2, 3, 4], 5),
						'Mileage': [36, 38, 30, 29,
									34, 38, 30, 29,
									34, 28, 38, 32,
									38, 34, 20, 44,
									26, 28, 34, 50]})

# Conduct the repeated measures ANOVA
print(AnovaRM(data=dataframe, depvar='Mileage',
			subject='Cars', within=['Oil']).fit())

              Anova
=================================
    F Value Num DF  Den DF Pr > F
---------------------------------
Oil  0.5679 3.0000 12.0000 0.6466
=================================


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]

print(st.mannwhitneyu(data1, data2))

stat, p = st.mannwhitneyu(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably the same distribution')
else:
 print('Probably different distributions')

MannwhitneyuResult(statistic=40.0, pvalue=0.47267559351158717)
stat=40.000, p=0.473
Probably the same distribution


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]

print(st.wilcoxon(data1, data2))

stat, p = st.wilcoxon(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably the same distribution')
else:
 print('Probably different distributions')

WilcoxonResult(statistic=21.0, pvalue=0.556640625)
stat=21.000, p=0.557
Probably the same distribution


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]

print(st.kruskal(data1, data2))

stat, p = st.kruskal(data1, data2)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably the same distribution')
else:
 print('Probably different distributions')

KruskalResult(statistic=0.5714285714285694, pvalue=0.4496917979688917)
stat=0.571, p=0.450
Probably the same distribution


data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]

print(st.friedmanchisquare(data1, data2, data3))

stat, p = st.friedmanchisquare(data1, data2, data3)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
 print('Probably the same distribution')
else:
 print('Probably different distributions')

FriedmanchisquareResult(statistic=0.8000000000000114, pvalue=0.6703200460356356)
stat=0.800, p=0.670
Probably the same distribution

Introduction to Scipy¶

Installation of Scipy¶

Random Sample from Normal Distribution¶

Histogram of Normal Distribution¶

CDF of Normal Distribution¶

Plotting Normal CDF¶

Normal PDF¶

Random Sample from Uniform Distribution¶

Histogram of Uniform¶

CDF of Uniform¶

Plotting Uniform CDF¶

Uniform PDF¶

Statistical Hypothesis Testing¶

(A) Normality Tests¶

(1) Shapiro-Wilk Test¶

(2) D’Agostino’s K^2 Test¶

(3) Anderson-Darling Test¶

(B) Correlation Tests¶

(1) Pearson’s Correlation Coefficient¶

(2) Spearman’s Rank Correlation¶

(3) Kendall’s Rank Correlation¶

(4) Chi-Squared Test¶

(C) Parametric Statistical Hypothesis Tests¶

(1) Student's t-test¶

(2) Paired Student’s t-test¶

(3) Analysis of Variance Test (ANOVA)¶

(4) Repeated Measures ANOVA Test¶

(D) Nonparametric Statistical Hypothesis Tests¶

(1) Mann-Whitney U Test¶

(2) Wilcoxon Signed-Rank Test¶

(3) Kruskal-Wallis H Test¶

(4) Friedman's ANOVA Test¶