# Hypothesis Testing#

## Synopsis#

• Null Hypothesis Significance testing (NHST)

• Least important : Only useful in come context

• Methodology

• Apparent difference in distribution

• Assume no difference

• Test Statistics

• Calculate P-Value

• Comment

Note

We make modelling decisions on

1. Null Hypothesis

2. Test Statistic ( Choosing a 1-sided or a 2-sided test)

import numpy as np
import scipy as sp
import scipy.stats as stats
import matplotlib as mpl
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import pandas as pd
from IPython.display import display, display_html
import bqplot
import first

from bqplot import LinearScale, Hist, Figure, Axis, ColorScale

from bqplot import pyplot as pltbq
# import seaborn as sns
# seed the random number generator so we all get the same results
np.random.seed(17)

# some nice colors from http://colorbrewer2.org/
COLOR1 = '#7fc97f'
COLOR2 = '#beaed4'
COLOR3 = '#fdc086'
COLOR4 = '#ffff99'
COLOR5 = '#386cb0'

mpl.rcParams['figure.figsize'] = (8.0, 9.0)

%matplotlib inline


## Part 1#

• Suppose u observe apparent difference between 2 groups

• Check if it might be due to chance

live, firsts, others = first.MakeFrames()

firsts.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4413 entries, 0 to 13588
Columns: 244 entries, caseid to totalwgt_lb
dtypes: float64(171), int64(73)
memory usage: 8.2 MB

firsts.head()

caseid pregordr howpreg_n howpreg_p moscurrp nowprgdk pregend1 pregend2 nbrnaliv multbrth ... laborfor_i religion_i metro_i basewgt adj_mod_basewgt finalwgt secu_p sest cmintvw totalwgt_lb
0 1 1 NaN NaN NaN NaN 6.0 NaN 1.0 NaN ... 0 0 0 3410.389399 3869.349602 6448.271112 2 9 NaN 8.8125
2 2 1 NaN NaN NaN NaN 5.0 NaN 3.0 5.0 ... 0 0 0 7226.301740 8567.549110 12999.542264 2 12 NaN 9.1250
5 6 1 NaN NaN NaN NaN 6.0 NaN 1.0 NaN ... 0 0 0 4870.926435 5325.196999 8874.440799 1 23 NaN 8.5625
8 7 1 NaN NaN NaN NaN 5.0 NaN 1.0 NaN ... 0 0 0 3409.579565 3787.539000 6911.879921 2 14 NaN 7.5625
10 12 1 NaN NaN NaN NaN 5.0 NaN 1.0 NaN ... 0 0 0 3612.781968 4146.013572 6909.331618 1 31 NaN 7.8125

5 rows × 244 columns

firsts.describe()

caseid pregordr howpreg_n howpreg_p moscurrp nowprgdk pregend1 pregend2 nbrnaliv multbrth ... laborfor_i religion_i metro_i basewgt adj_mod_basewgt finalwgt secu_p sest cmintvw totalwgt_lb
count 4413.000000 4413.000000 0.0 0.0 0.0 0.0 4413.000000 5.000000 4412.000000 52.000000 ... 4413.000000 4413.000000 4413.0 4413.000000 4413.000000 4413.000000 4413.000000 4413.000000 0.0 4363.000000
mean 6238.983005 1.330387 NaN NaN NaN NaN 5.764786 5.000000 1.014733 1.692308 ... 0.000906 0.003626 0.0 4181.313797 5347.029828 8143.762911 1.489916 43.808294 NaN 7.201094
std 3651.545931 0.715439 NaN NaN NaN NaN 0.431071 2.236068 0.153581 1.528019 ... 0.030096 0.063770 0.0 3539.367216 4991.010212 8270.404870 0.499955 24.116585 NaN 1.420573
min 1.000000 1.000000 NaN NaN NaN NaN 1.000000 1.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 64.577101 71.201194 118.656790 1.000000 1.000000 NaN 0.125000
25% 3047.000000 1.000000 NaN NaN NaN NaN 6.000000 6.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 2418.005561 2876.683738 4033.606004 1.000000 24.000000 NaN 6.437500
50% 6179.000000 1.000000 NaN NaN NaN NaN 6.000000 6.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 3409.970503 4196.796883 6456.845037 1.000000 45.000000 NaN 7.312500
75% 9415.000000 1.000000 NaN NaN NaN NaN 6.000000 6.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 4870.474384 5862.064708 9496.575715 2.000000 65.000000 NaN 8.000000
max 12571.000000 9.000000 NaN NaN NaN NaN 6.000000 6.000000 5.000000 5.000000 ... 1.000000 2.000000 0.0 99707.832014 157143.686687 261879.953864 2.000000 84.000000 NaN 15.437500

8 rows × 244 columns

• Review variables

• pregnancy length

• birth weight

def test_statistic(data):
grp1, grp2 = data
test_stat = abs(grp1.mean() - grp2.mean())
return test_stat

grp1 , grp2 = firsts.prglngth, others.prglngth
actual = test_statistic((grp1 , grp2))
actual

0.07803726677754952

grp2

1        39
3        39
4        39
6        40
7        42
..
13572    39
13574    39
13579    39
13591    39
13592    39
Name: prglngth, Length: 4735, dtype: int64

n, m = len(grp1), len(grp2)
pool = np.hstack((grp1, grp2))

pool

array([39, 39, 38, ..., 39, 39, 39])

def run_model():
np.random.shuffle(pool)
data = pool[:n], pool[n:]
return data

run_model()

(array([30, 35, 41, ..., 40, 39, 39]), array([39, 39, 39, ..., 39, 38, 40]))

test_statistic(run_model())

0.04892370650121336

test_stats = np.array([test_statistic(run_model()) for i in range(1000)])

plt.axvline(actual, linewidth=3, color='0.8')
plt.hist(test_stats, color=COLOR5)
plt.xlabel('Difference in means')
plt.ylabel('count')
plt.show()

pvalue = sum(test_stats>=actual)/len(test_stats)
pvalue

0.178

• Probability that test statistic under null hypothesis exceeds the actual value

## Part 2#

### Framework#

class HypothesisTest(object):

def __init__(self, data):
""" Initiailizes

data: data in whatever form is relevant
"""

self.data = data
self.test_stats = None
self.actual = self.test_statistics(data)
self.make_model()

def pvalue(self, iters=1000):
""" Computes distribution of the test statistics

iters: number of iterations
returns: float p-value
"""

self.test_stats = np.array([self.test_statistics(self.run_model()) for _ in range(iters)])
count = sum(self.test_stats >= actual)
return count/iters

def plot_hist(self):
plt.axvline(self.actual, linewidth=3, color='0.8')
plt.hist(self.test_stats, color=COLOR4)
plt.xlabel('Test Statistics')
plt.ylabel('count')
plt.show()

def max_teststat(self):
"""
Returns the largest test statistics seen during simulations
"""
return max(self.test_stats)

def test_statistics(self, data):
""" Computes the test statistics

data: data in whatever form is relative_difference_by_female
"""

raise UnimplementedMethodException()

def make_model(self):
"""
Build a model of null hypothesis
"""

pass

def run_model(self):
"""
Run the model of the null hypothesis

returns: simulated data
"""

raise UnimplementedMethodException()


### DiffMeansPermute#

class DiffMeansPermute(HypothesisTest):

def test_statistics(self, data):
"""
Computes the test statistics

data: data in whatever form is relevant for the diff
"""

grp1, grp2 = data
test_stat = abs(grp1.mean() - grp2.mean())
return test_stat

def make_model(self):
grp1, grp2  = self.data
self.n, self.m = len(grp1), len(grp2)
self.pool = np.hstack((grp1, grp2))

def run_model(self):
"""
Run the model of the null hypothesis

returns: simulated data
"""

np.random.shuffle(self.pool)
data = self.pool[:self.n], self.pool[self.n:]
return data

data = (firsts.prglngth, others.prglngth)

ht = DiffMeansPermute(data)

p_value = ht.pvalue(iters=1000)
p_value, ht.actual, ht.max_teststat()

(0.164, 0.07803726677754952, 0.20456044359674053)

ht.plot_hist()


### DiffStdPermute#

class DiffStdPermute(DiffMeansPermute):

def test_statistics(self, data):
grp1, grp2 = data
test_stat = abs(grp1.std() - grp2.std())
return test_stat


data = (firsts.prglngth, others.prglngth)

ht = DiffStdPermute(data)

p_value = ht.pvalue(iters=1000)
p_value, ht.actual, ht.max_teststat()

(0.523, 0.17604906422948297, 0.3682700930119429)

ht.plot_hist()


### Applying on Birth Weights#

data = (firsts.totalwgt_lb.dropna(), others.totalwgt_lb.dropna())

ht = DiffMeansPermute(data)

p_value = ht.pvalue(iters=1000)

p_value, ht.actual, ht.max_teststat()

(0.004, 0.12476118453549034, 0.08186367252086946)

data

(0        8.8125
2        9.1250
5        8.5625
8        7.5625
10       7.8125
...
13576    6.4375
13578    6.0000
13581    6.3750
13584    6.3750
13588    6.1875
Name: totalwgt_lb, Length: 4363, dtype: float64,
1        7.8750
3        7.0000
4        6.1875
6        9.5625
7        8.3750
...
13572    5.8125
13574    6.1250
13579    7.0000
13591    7.5000
13592    7.5000
Name: totalwgt_lb, Length: 4675, dtype: float64)