Hypothesis Testing#

Synopsis#

  • Null Hypothesis Significance testing (NHST)

  • Proof by contradiction

  • Least important : Only useful in come context

  • Methodology

    • Apparent difference in distribution

    • Assume no difference

    • Test Statistics

    • Calculate P-Value

    • Comment

Note

We make modelling decisions on

  1. Null Hypothesis

  2. Test Statistic ( Choosing a 1-sided or a 2-sided test)

import numpy as np
import scipy as sp
import scipy.stats as stats
import matplotlib as mpl
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import pandas as pd
from IPython.display import display, display_html
import bqplot
import first

from bqplot import LinearScale, Hist, Figure, Axis, ColorScale

from bqplot import pyplot as pltbq
# import seaborn as sns
# seed the random number generator so we all get the same results
np.random.seed(17)

# some nice colors from http://colorbrewer2.org/
COLOR1 = '#7fc97f'
COLOR2 = '#beaed4'
COLOR3 = '#fdc086'
COLOR4 = '#ffff99'
COLOR5 = '#386cb0'

mpl.rcParams['figure.figsize'] = (8.0, 9.0)
%matplotlib inline

Part 1#

  • Suppose u observe apparent difference between 2 groups

  • Check if it might be due to chance

live, firsts, others = first.MakeFrames()
firsts.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4413 entries, 0 to 13588
Columns: 244 entries, caseid to totalwgt_lb
dtypes: float64(171), int64(73)
memory usage: 8.2 MB
firsts.head()
caseid pregordr howpreg_n howpreg_p moscurrp nowprgdk pregend1 pregend2 nbrnaliv multbrth ... laborfor_i religion_i metro_i basewgt adj_mod_basewgt finalwgt secu_p sest cmintvw totalwgt_lb
0 1 1 NaN NaN NaN NaN 6.0 NaN 1.0 NaN ... 0 0 0 3410.389399 3869.349602 6448.271112 2 9 NaN 8.8125
2 2 1 NaN NaN NaN NaN 5.0 NaN 3.0 5.0 ... 0 0 0 7226.301740 8567.549110 12999.542264 2 12 NaN 9.1250
5 6 1 NaN NaN NaN NaN 6.0 NaN 1.0 NaN ... 0 0 0 4870.926435 5325.196999 8874.440799 1 23 NaN 8.5625
8 7 1 NaN NaN NaN NaN 5.0 NaN 1.0 NaN ... 0 0 0 3409.579565 3787.539000 6911.879921 2 14 NaN 7.5625
10 12 1 NaN NaN NaN NaN 5.0 NaN 1.0 NaN ... 0 0 0 3612.781968 4146.013572 6909.331618 1 31 NaN 7.8125

5 rows × 244 columns

firsts.describe()
caseid pregordr howpreg_n howpreg_p moscurrp nowprgdk pregend1 pregend2 nbrnaliv multbrth ... laborfor_i religion_i metro_i basewgt adj_mod_basewgt finalwgt secu_p sest cmintvw totalwgt_lb
count 4413.000000 4413.000000 0.0 0.0 0.0 0.0 4413.000000 5.000000 4412.000000 52.000000 ... 4413.000000 4413.000000 4413.0 4413.000000 4413.000000 4413.000000 4413.000000 4413.000000 0.0 4363.000000
mean 6238.983005 1.330387 NaN NaN NaN NaN 5.764786 5.000000 1.014733 1.692308 ... 0.000906 0.003626 0.0 4181.313797 5347.029828 8143.762911 1.489916 43.808294 NaN 7.201094
std 3651.545931 0.715439 NaN NaN NaN NaN 0.431071 2.236068 0.153581 1.528019 ... 0.030096 0.063770 0.0 3539.367216 4991.010212 8270.404870 0.499955 24.116585 NaN 1.420573
min 1.000000 1.000000 NaN NaN NaN NaN 1.000000 1.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 64.577101 71.201194 118.656790 1.000000 1.000000 NaN 0.125000
25% 3047.000000 1.000000 NaN NaN NaN NaN 6.000000 6.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 2418.005561 2876.683738 4033.606004 1.000000 24.000000 NaN 6.437500
50% 6179.000000 1.000000 NaN NaN NaN NaN 6.000000 6.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 3409.970503 4196.796883 6456.845037 1.000000 45.000000 NaN 7.312500
75% 9415.000000 1.000000 NaN NaN NaN NaN 6.000000 6.000000 1.000000 1.000000 ... 0.000000 0.000000 0.0 4870.474384 5862.064708 9496.575715 2.000000 65.000000 NaN 8.000000
max 12571.000000 9.000000 NaN NaN NaN NaN 6.000000 6.000000 5.000000 5.000000 ... 1.000000 2.000000 0.0 99707.832014 157143.686687 261879.953864 2.000000 84.000000 NaN 15.437500

8 rows × 244 columns

  • Review variables

    • pregnancy length

    • birth weight

def test_statistic(data):
    grp1, grp2 = data
    test_stat = abs(grp1.mean() - grp2.mean())
    return test_stat
grp1 , grp2 = firsts.prglngth, others.prglngth
actual = test_statistic((grp1 , grp2))
actual
0.07803726677754952
grp2
1        39
3        39
4        39
6        40
7        42
         ..
13572    39
13574    39
13579    39
13591    39
13592    39
Name: prglngth, Length: 4735, dtype: int64
n, m = len(grp1), len(grp2)
pool = np.hstack((grp1, grp2))
pool
array([39, 39, 38, ..., 39, 39, 39])
def run_model():
    np.random.shuffle(pool)
    data = pool[:n], pool[n:]
    return data 
run_model()
(array([30, 35, 41, ..., 40, 39, 39]), array([39, 39, 39, ..., 39, 38, 40]))
test_statistic(run_model())
0.04892370650121336
test_stats = np.array([test_statistic(run_model()) for i in range(1000)])
plt.axvline(actual, linewidth=3, color='0.8')
plt.hist(test_stats, color=COLOR5)
plt.xlabel('Difference in means')
plt.ylabel('count')
plt.show()
../../_images/03_hypothesis_testing_20_0.png
pvalue = sum(test_stats>=actual)/len(test_stats)
pvalue
0.178
  • Probability that test statistic under null hypothesis exceeds the actual value

Part 2#

Framework#

class HypothesisTest(object):
    
    def __init__(self, data):
        """ Initiailizes
        
        data: data in whatever form is relevant
        """
        
        self.data = data
        self.test_stats = None
        self.actual = self.test_statistics(data)
        self.make_model()
        
    def pvalue(self, iters=1000):
        """ Computes distribution of the test statistics
        
        iters: number of iterations
        returns: float p-value
        """
        
        self.test_stats = np.array([self.test_statistics(self.run_model()) for _ in range(iters)])
        count = sum(self.test_stats >= actual)
        return count/iters
    
    
    def plot_hist(self):
        plt.axvline(self.actual, linewidth=3, color='0.8')
        plt.hist(self.test_stats, color=COLOR4)
        plt.xlabel('Test Statistics')
        plt.ylabel('count')
        plt.show()
        
    
    def max_teststat(self):
        """
        Returns the largest test statistics seen during simulations
        """
        return max(self.test_stats)
    
    
    def test_statistics(self, data):
        """ Computes the test statistics
        
        data: data in whatever form is relative_difference_by_female
        """
        
        raise UnimplementedMethodException()
        
        
    def make_model(self):
        """
        Build a model of null hypothesis
        """
        
        pass
    
    def run_model(self):
        """
        Run the model of the null hypothesis
        
        returns: simulated data
        """
        
        raise UnimplementedMethodException()

DiffMeansPermute#

class DiffMeansPermute(HypothesisTest):
    
    def test_statistics(self, data):
        """
        Computes the test statistics
        
        data: data in whatever form is relevant for the diff
        """
        
        grp1, grp2 = data
        test_stat = abs(grp1.mean() - grp2.mean())
        return test_stat
    
    def make_model(self):
        grp1, grp2  = self.data
        self.n, self.m = len(grp1), len(grp2)
        self.pool = np.hstack((grp1, grp2))
        
    
    def run_model(self):
        """
        Run the model of the null hypothesis
        
        returns: simulated data 
        """
        
        np.random.shuffle(self.pool)
        data = self.pool[:self.n], self.pool[self.n:]
        return data
data = (firsts.prglngth, others.prglngth)

ht = DiffMeansPermute(data)

p_value = ht.pvalue(iters=1000)
p_value, ht.actual, ht.max_teststat()
(0.164, 0.07803726677754952, 0.20456044359674053)
ht.plot_hist()
../../_images/03_hypothesis_testing_29_0.png

DiffStdPermute#

class DiffStdPermute(DiffMeansPermute):
    
    def test_statistics(self, data):
        grp1, grp2 = data
        test_stat = abs(grp1.std() - grp2.std())
        return test_stat
    
data = (firsts.prglngth, others.prglngth)

ht = DiffStdPermute(data)

p_value = ht.pvalue(iters=1000)
p_value, ht.actual, ht.max_teststat()
(0.523, 0.17604906422948297, 0.3682700930119429)
ht.plot_hist()
../../_images/03_hypothesis_testing_33_0.png

Applying on Birth Weights#

data = (firsts.totalwgt_lb.dropna(), others.totalwgt_lb.dropna())

ht = DiffMeansPermute(data)

p_value = ht.pvalue(iters=1000)

p_value, ht.actual, ht.max_teststat()
(0.004, 0.12476118453549034, 0.08186367252086946)
data
(0        8.8125
 2        9.1250
 5        8.5625
 8        7.5625
 10       7.8125
           ...  
 13576    6.4375
 13578    6.0000
 13581    6.3750
 13584    6.3750
 13588    6.1875
 Name: totalwgt_lb, Length: 4363, dtype: float64,
 1        7.8750
 3        7.0000
 4        6.1875
 6        9.5625
 7        8.3750
           ...  
 13572    5.8125
 13574    6.1250
 13579    7.0000
 13591    7.5000
 13592    7.5000
 Name: totalwgt_lb, Length: 4675, dtype: float64)