Hypothesis Testing#

Synopsis#

Null Hypothesis Significance testing (NHST)
Proof by contradiction
Least important : Only useful in come context
Methodology
- Apparent difference in distribution
- Assume no difference
- Test Statistics
- Calculate P-Value
- Comment

Note

We make modelling decisions on

Null Hypothesis
Test Statistic ( Choosing a 1-sided or a 2-sided test)

import numpy as np
import scipy as sp
import scipy.stats as stats
import matplotlib as mpl
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import pandas as pd
from IPython.display import display, display_html
import bqplot
import first

from bqplot import LinearScale, Hist, Figure, Axis, ColorScale

from bqplot import pyplot as pltbq
# import seaborn as sns
# seed the random number generator so we all get the same results
np.random.seed(17)

# some nice colors from http://colorbrewer2.org/
COLOR1 = '#7fc97f'
COLOR2 = '#beaed4'
COLOR3 = '#fdc086'
COLOR4 = '#ffff99'
COLOR5 = '#386cb0'

mpl.rcParams['figure.figsize'] = (8.0, 9.0)

%matplotlib inline

Part 1#

Suppose u observe apparent difference between 2 groups
Check if it might be due to chance

live, firsts, others = first.MakeFrames()

firsts.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4413 entries, 0 to 13588
Columns: 244 entries, caseid to totalwgt_lb
dtypes: float64(171), int64(73)
memory usage: 8.2 MB

firsts.head()

	caseid	pregordr	howpreg_n	howpreg_p	moscurrp	nowprgdk	pregend1	pregend2	nbrnaliv	multbrth	...	basewgt	adj_mod_basewgt	finalwgt	secu_p	sest	cmintvw	totalwgt_lb
0	1	1	NaN	NaN	NaN	NaN	6.0	NaN	1.0	NaN	...	3410.389399	3869.349602	6448.271112	2	9	NaN	8.8125
2	2	1	NaN	NaN	NaN	NaN	5.0	NaN	3.0	5.0	...	7226.301740	8567.549110	12999.542264	2	12	NaN	9.1250
5	6	1	NaN	NaN	NaN	NaN	6.0	NaN	1.0	NaN	...	4870.926435	5325.196999	8874.440799	1	23	NaN	8.5625
8	7	1	NaN	NaN	NaN	NaN	5.0	NaN	1.0	NaN	...	3409.579565	3787.539000	6911.879921	2	14	NaN	7.5625
10	12	1	NaN	NaN	NaN	NaN	5.0	NaN	1.0	NaN	...	3612.781968	4146.013572	6909.331618	1	31	NaN	7.8125

5 rows × 244 columns

firsts.describe()

	caseid	pregordr	howpreg_n	howpreg_p	moscurrp	nowprgdk	pregend1	pregend2	nbrnaliv	multbrth	...	laborfor_i	religion_i	metro_i	basewgt	adj_mod_basewgt	finalwgt	secu_p	sest	cmintvw	totalwgt_lb
count	4413.000000	4413.000000	0.0	0.0	0.0	0.0	4413.000000	5.000000	4412.000000	52.000000	...	4413.000000	4413.000000	4413.0	4413.000000	4413.000000	4413.000000	4413.000000	4413.000000	0.0	4363.000000
mean	6238.983005	1.330387	NaN	NaN	NaN	NaN	5.764786	5.000000	1.014733	1.692308	...	0.000906	0.003626	0.0	4181.313797	5347.029828	8143.762911	1.489916	43.808294	NaN	7.201094
std	3651.545931	0.715439	NaN	NaN	NaN	NaN	0.431071	2.236068	0.153581	1.528019	...	0.030096	0.063770	0.0	3539.367216	4991.010212	8270.404870	0.499955	24.116585	NaN	1.420573
min	1.000000	1.000000	NaN	NaN	NaN	NaN	1.000000	1.000000	1.000000	1.000000	...	0.000000	0.000000	0.0	64.577101	71.201194	118.656790	1.000000	1.000000	NaN	0.125000
25%	3047.000000	1.000000	NaN	NaN	NaN	NaN	6.000000	6.000000	1.000000	1.000000	...	0.000000	0.000000	0.0	2418.005561	2876.683738	4033.606004	1.000000	24.000000	NaN	6.437500
50%	6179.000000	1.000000	NaN	NaN	NaN	NaN	6.000000	6.000000	1.000000	1.000000	...	0.000000	0.000000	0.0	3409.970503	4196.796883	6456.845037	1.000000	45.000000	NaN	7.312500
75%	9415.000000	1.000000	NaN	NaN	NaN	NaN	6.000000	6.000000	1.000000	1.000000	...	0.000000	0.000000	0.0	4870.474384	5862.064708	9496.575715	2.000000	65.000000	NaN	8.000000
max	12571.000000	9.000000	NaN	NaN	NaN	NaN	6.000000	6.000000	5.000000	5.000000	...	1.000000	2.000000	0.0	99707.832014	157143.686687	261879.953864	2.000000	84.000000	NaN	15.437500

8 rows × 244 columns

Review variables
- pregnancy length
- birth weight

def test_statistic(data):
    grp1, grp2 = data
    test_stat = abs(grp1.mean() - grp2.mean())
    return test_stat

grp1 , grp2 = firsts.prglngth, others.prglngth
actual = test_statistic((grp1 , grp2))
actual

0.07803726677754952

grp2

1        39
3        39
4        39
6        40
7        42
         ..
13572    39
13574    39
13579    39
13591    39
13592    39
Name: prglngth, Length: 4735, dtype: int64

n, m = len(grp1), len(grp2)
pool = np.hstack((grp1, grp2))

pool

array([39, 39, 38, ..., 39, 39, 39])

def run_model():
    np.random.shuffle(pool)
    data = pool[:n], pool[n:]
    return data 

run_model()

(array([30, 35, 41, ..., 40, 39, 39]), array([39, 39, 39, ..., 39, 38, 40]))

test_statistic(run_model())

0.04892370650121336

test_stats = np.array([test_statistic(run_model()) for i in range(1000)])

plt.axvline(actual, linewidth=3, color='0.8')
plt.hist(test_stats, color=COLOR5)
plt.xlabel('Difference in means')
plt.ylabel('count')
plt.show()

../../_images/03_hypothesis_testing_20_0.png

pvalue = sum(test_stats>=actual)/len(test_stats)
pvalue

0.178

Probability that test statistic under null hypothesis exceeds the actual value

Part 2#

Framework#

class HypothesisTest(object):
    
    def __init__(self, data):
        """ Initiailizes
        
        data: data in whatever form is relevant
        """
        
        self.data = data
        self.test_stats = None
        self.actual = self.test_statistics(data)
        self.make_model()
        
    def pvalue(self, iters=1000):
        """ Computes distribution of the test statistics
        
        iters: number of iterations
        returns: float p-value
        """
        
        self.test_stats = np.array([self.test_statistics(self.run_model()) for _ in range(iters)])
        count = sum(self.test_stats >= actual)
        return count/iters
    
    
    def plot_hist(self):
        plt.axvline(self.actual, linewidth=3, color='0.8')
        plt.hist(self.test_stats, color=COLOR4)
        plt.xlabel('Test Statistics')
        plt.ylabel('count')
        plt.show()
        
    
    def max_teststat(self):
        """
        Returns the largest test statistics seen during simulations
        """
        return max(self.test_stats)
    
    
    def test_statistics(self, data):
        """ Computes the test statistics
        
        data: data in whatever form is relative_difference_by_female
        """
        
        raise UnimplementedMethodException()
        
        
    def make_model(self):
        """
        Build a model of null hypothesis
        """
        
        pass
    
    def run_model(self):
        """
        Run the model of the null hypothesis
        
        returns: simulated data
        """
        
        raise UnimplementedMethodException()

DiffMeansPermute#

class DiffMeansPermute(HypothesisTest):
    
    def test_statistics(self, data):
        """
        Computes the test statistics
        
        data: data in whatever form is relevant for the diff
        """
        
        grp1, grp2 = data
        test_stat = abs(grp1.mean() - grp2.mean())
        return test_stat
    
    def make_model(self):
        grp1, grp2  = self.data
        self.n, self.m = len(grp1), len(grp2)
        self.pool = np.hstack((grp1, grp2))
        
    
    def run_model(self):
        """
        Run the model of the null hypothesis
        
        returns: simulated data 
        """
        
        np.random.shuffle(self.pool)
        data = self.pool[:self.n], self.pool[self.n:]
        return data

data = (firsts.prglngth, others.prglngth)

ht = DiffMeansPermute(data)

p_value = ht.pvalue(iters=1000)
p_value, ht.actual, ht.max_teststat()

(0.164, 0.07803726677754952, 0.20456044359674053)

ht.plot_hist()

../../_images/03_hypothesis_testing_29_0.png

DiffStdPermute#

class DiffStdPermute(DiffMeansPermute):
    
    def test_statistics(self, data):
        grp1, grp2 = data
        test_stat = abs(grp1.std() - grp2.std())
        return test_stat
    

data = (firsts.prglngth, others.prglngth)

ht = DiffStdPermute(data)

p_value = ht.pvalue(iters=1000)
p_value, ht.actual, ht.max_teststat()

(0.523, 0.17604906422948297, 0.3682700930119429)

ht.plot_hist()

../../_images/03_hypothesis_testing_33_0.png

Applying on Birth Weights#

data = (firsts.totalwgt_lb.dropna(), others.totalwgt_lb.dropna())

ht = DiffMeansPermute(data)

p_value = ht.pvalue(iters=1000)

p_value, ht.actual, ht.max_teststat()

(0.004, 0.12476118453549034, 0.08186367252086946)

data

(0        8.8125
 2        9.1250
 5        8.5625
 8        7.5625
 10       7.8125
           ...  
 13576    6.4375
 13578    6.0000
 13581    6.3750
 13584    6.3750
 13588    6.1875
 Name: totalwgt_lb, Length: 4363, dtype: float64,
 1        7.8750
 3        7.0000
 4        6.1875
 6        9.5625
 7        8.3750
           ...  
 13572    5.8125
 13574    6.1250
 13579    7.0000
 13591    7.5000
 13592    7.5000
 Name: totalwgt_lb, Length: 4675, dtype: float64)

AIBook

Hypothesis Testing

Contents

Hypothesis Testing#

Synopsis#

Part 1#

Part 2#

Framework#

DiffMeansPermute#

DiffStdPermute#

Applying on Birth Weights#