# GLM: Hierarchical Linear Regression with ADVI¶

In [1]:

%matplotlib inline
import sys, os
import theano
theano.config.floatX = 'float64'
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import pandas as pd

county_names = data.county.unique()
county_idx = data['county_code'].values
n_counties = len(data.county.unique())

In [2]:

with pm.Model() as hierarchical_model:
# Hyperpriors for group nodes
mu_a = pm.Normal('mu_alpha', mu=0., sd=100**2)
sigma_a = pm.Uniform('sigma_alpha', lower=0, upper=100)
mu_b = pm.Normal('mu_beta', mu=0., sd=100**2)
sigma_b = pm.Uniform('sigma_beta', lower=0, upper=100)

# Intercept for each county, distributed around group mean mu_a
# Above we just set mu and sd to a fixed value while here we
# plug in a common group distribution for all a and b (which are
# vectors of length n_counties).
a = pm.Normal('alpha', mu=mu_a, sd=sigma_a, shape=n_counties)
# Intercept for each county, distributed around group mean mu_a
b = pm.Normal('beta', mu=mu_b, sd=sigma_b, shape=n_counties)

# Model error
eps = pm.Uniform('eps', lower=0, upper=100)

# Model prediction of radon level
# a[county_idx] translates to a[0, 0, 0, 1, 1, ...],
# we thus link multiple household measures of a county
# to its coefficients.
radon_est = a[county_idx] + b[county_idx] * data.floor.values

# Data likelihood

In [3]:

with hierarchical_model:

/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:2: DeprecationWarning: Old ADVI interface and sample_vp is deprecated and will be removed in future, use pm.fit and pm.sample_approx instead
from ipykernel import kernelapp as app
Average ELBO = -1,117.8:  45%|████▍     | 44977/100000 [00:09<00:11, 4686.75it/s]Median ELBO converged.
Finished [100%]: Average ELBO = -1,107.1


In [4]:

# Inference button (TM)!
with hierarchical_model:
#start = pm.find_MAP()
step = pm.NUTS(scaling=means)
hierarchical_trace = pm.sample(5000, step, start=means, progressbar=False)

/home/jovyan/pymc3/pymc3/step_methods/hmc/nuts.py:237: UserWarning: Step size tuning was enabled throughout the whole trace. You might want to specify the number of tuning steps.
warnings.warn('Step size tuning was enabled throughout the whole '
/opt/conda/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2889: RuntimeWarning: Mean of empty slice.
out=out, **kwargs)

In [5]:

from scipy import stats
import seaborn as sns
varnames = means.keys()
fig, axs = plt.subplots(nrows=len(varnames), figsize=(12, 18))
for var, ax in zip(varnames, axs):
mu_arr = means[var]
sigma_arr = sds[var]
ax.set_title(var)
for i, (mu, sigma) in enumerate(zip(mu_arr.flatten(), sigma_arr.flatten())):
sd3 = (-4*sigma + mu, 4*sigma + mu)
x = np.linspace(sd3[0], sd3[1], 300)
y = stats.norm(mu, sigma).pdf(x)
ax.plot(x, y)
if hierarchical_trace[var].ndim > 1:
t = hierarchical_trace[var][i]
else:
t = hierarchical_trace[var]
sns.distplot(t, kde=False, norm_hist=True, ax=ax)
fig.tight_layout()

/opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))


Plotting the hierarchical model trace -its found values- from 500 iterations onwards (right side plot) and its accumulated marginal values (left side plot)

In [6]:

pm.traceplot(hierarchical_trace[500:]);

/opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))


The marginal posteriors in the left column are highly informative. mu_a tells us the group mean (log) radon levels. mu_b tells us that having no basement decreases radon levels significantly (no mass above zero). We can also see by looking at the marginals for a that there is quite some differences in radon levels between counties (each ‘rainbow’ color corresponds to a single county); the different widths are related to how much confidence we have in each paramter estimate – the more measurements per county, the higher our confidence will be.

## Posterior Predictive Check¶

### The Root Mean Square Deviation¶

To find out which of the models explains the data better we can calculate the Root Mean Square Deviaton (RMSD). This posterior predictive check revolves around recreating the data based on the parameters found at different moments in the chain. The recreated or predicted values are subsequently compared to the real data points, the model that predicts data points closer to the original data is considered the better one. Thus, the lower the RMSD the better.

When computing the RMSD (code not shown) we get the following result:

• individual/non-hierarchical model: 0.13
• hierarchical model: 0.08

As can be seen above the hierarchical model performs better than the non-hierarchical model in predicting the radon values. Following this, we’ll plot some examples of county’s showing the actual radon measurements, the hierarchial predictions and the non-hierarchical predictions.

In [7]:

# Non-hierarchical model runs
selection = ['CASS', 'CROW WING', 'FREEBORN']

indiv_traces = {}
for county_name in selection:
# Select subset of data belonging to county
c_data = data.ix[data.county == county_name]
c_floor_measure = c_data.floor.values

with pm.Model() as individual_model:
# Intercept prior (variance == sd**2)
a = pm.Normal('alpha', mu=0, sd=100**2)
# Slope prior
b = pm.Normal('beta', mu=0, sd=100**2)

# Model error prior
eps = pm.Uniform('eps', lower=0, upper=100)

# Linear model
radon_est = a + b * c_floor_measure

# Data likelihood

# Inference button (TM)!
trace = pm.sample(2000, progressbar=False)

# keep trace for later analysis
indiv_traces[county_name] = trace

Auto-assigning NUTS sampler...
Finished [100%]: Average Loss = 22.56
/home/jovyan/pymc3/pymc3/step_methods/hmc/nuts.py:237: UserWarning: Step size tuning was enabled throughout the whole trace. You might want to specify the number of tuning steps.
warnings.warn('Step size tuning was enabled throughout the whole '
/opt/conda/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2889: RuntimeWarning: Mean of empty slice.
out=out, **kwargs)
Auto-assigning NUTS sampler...
Finished [100%]: Average Loss = 37.849
/home/jovyan/pymc3/pymc3/step_methods/hmc/nuts.py:237: UserWarning: Step size tuning was enabled throughout the whole trace. You might want to specify the number of tuning steps.
warnings.warn('Step size tuning was enabled throughout the whole '
/opt/conda/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2889: RuntimeWarning: Mean of empty slice.
out=out, **kwargs)
Auto-assigning NUTS sampler...
Convergence archived at 39000
Interrupted at 39,000 [19%]: Average Loss = 37.975

In [8]:

fig, axis = plt.subplots(1, 3, figsize=(12, 6), sharey=True, sharex=True)
axis = axis.ravel()
for i, c in enumerate(selection):
c_data = data.ix[data.county == c]
c_ind = np.where(county_names==c)[0][0]
c_data = c_data.reset_index(drop = True)
z = list(c_data['county_code'])[0]

xvals = np.linspace(-0.2, 1.2)
for a_val, b_val in zip(indiv_traces[c]['alpha'][500:], indiv_traces[c]['beta'][500:]):
axis[i].plot(xvals, a_val + b_val * xvals, 'b', alpha=.1)
axis[i].plot(xvals, indiv_traces[c]['alpha'][500:].mean() + indiv_traces[c]['beta'][500:].mean() * xvals,
'b', alpha=1, lw=2., label='individual')
for a_val, b_val in zip(hierarchical_trace['alpha'][500:][z], hierarchical_trace['beta'][500:][z]):
axis[i].plot(xvals, a_val + b_val * xvals, 'g', alpha=.1)
axis[i].plot(xvals, hierarchical_trace['alpha'][500:][z].mean() + hierarchical_trace['beta'][500:][z].mean() * xvals,
'g', alpha=1, lw=2., label='hierarchical')
alpha=1, color='k', marker='.', s=80, label='original data')
axis[i].set_xticks([0,1])
axis[i].set_xticklabels(['basement', 'no basement'])
axis[i].set_ylim(-1, 4)
axis[i].set_title(c)
if not i%3:
axis[i].legend()

/opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans