Import Needed Libraries

In [1]:
print(__doc__)
import pandas as pd
import numpy as np
import matplotlib as mpl
#mpl.use('TkAgg')
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

#import matplotlib
from pylab import rcParams
rcParams['mathtext.default']='regular'
rcParams['font.size'] = 20
rcParams['axes.labelsize'] = 30
rcParams['legend.fontsize'] = 20
mpl.rc('xtick', labelsize=15) 
mpl.rc('ytick', labelsize=15) 
sns.set_context('notebook')
sns.set_style('ticks')

from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import confusion_matrix

import itertools

import os

from ggplot import *

import subprocess
import quickspikes as qs
from scipy.integrate import odeint
from scipy.interpolate import UnivariateSpline

from tabulate import tabulate
import tabulate as T
del(T.LATEX_ESCAPE_RULES['$'])
del(T.LATEX_ESCAPE_RULES['\\'])
del(T.LATEX_ESCAPE_RULES['_'])
del(T.LATEX_ESCAPE_RULES['{'])
del(T.LATEX_ESCAPE_RULES['}'])

print(__doc__)
from time import time
from matplotlib import offsetbox
from sklearn import (manifold, datasets, decomposition, ensemble,
                     discriminant_analysis, random_projection)
Automatically created module for IPython interactive environment
Automatically created module for IPython interactive environment

Load in the Data

In [7]:
neuron_params = pd.read_csv("param_table_no_db.csv").iloc[:,2:]

Neuron Identity for each Epoch

In [8]:
neuron_1_labels = ['4839SW', '72L7E2', 'FGD399']
neuron_2_labels = ['555YZ6', 'D673X6', '5Q5T97']
neuron_3_labels = ['567H6C', '97C25L', '87LSF5'
                 ,'G73N45', 'Q7A364', 'UK59W3']
neuron_4_labels = ['9C88PR', 'R6E486', '386QZV', 
                  '5UY366','ZA98N2', '9M29H3']
neuron_5_labels = ['UR7L49', '73VW68', 'CU8L53']

Epochs Producing Best Predictions

In [9]:
neuron_1_labels = ['4839SW', '72L7E2', 'FGD399']
neuron_2_labels = ['555YZ6', 'D673X6', '5Q5T97']
neuron_3_labels = ['567H6C', '97C25L', '87LSF5'
                ,'G73N45', 'Q7A364', 'UK59W3']
#neuron_3_labels = []
neuron_4_labels = ['R6E486', '5UY366', 'ZA98N2', '9M29H3']
neuron_5_labels = ['UR7L49', 'CU8L53']
In [10]:
neuron_params['Neuron'] = '0'
neuron_params['Type'] = '0'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_1_labels), 'Neuron'] = '1'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_2_labels), 'Neuron'] = '2'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_3_labels), 'Neuron'] = '3'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_4_labels), 'Neuron'] = '4'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_5_labels), 'Neuron'] = '5'

Label Neuron Type (healthy or 3xTG)

In [11]:
neuron_params.ix[neuron_params.epoch_label.isin(neuron_1_labels), 'Type'] = 'Healthy'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_2_labels), 'Type'] = '3xTG'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_3_labels), 'Type'] = '3xTG'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_4_labels), 'Type'] = 'Healthy'
neuron_params.ix[neuron_params.epoch_label.isin(neuron_5_labels), 'Type'] = 'Healthy'

Drop Bad Values

In [12]:
neuron_params.replace([np.inf, -np.inf], np.nan, inplace = True)
neuron_params.dropna(inplace = True)
print_names = open('print_names_new.txt', 'r').readlines()
print_names = [x.strip() for x in print_names]
feature_dict = dict(zip(list(neuron_params), print_names))
reverse_feature_dict = dict(zip(print_names, list(neuron_params)))
neuron_params = neuron_params.rename(index = str, columns = feature_dict)

neuron_params = neuron_params[neuron_params['Type'] != '0']
all_neurons = np.unique(neuron_params['epoch label'])

Exploratory Data Analysis

One striking difference between 3xTG and healthy neurons is that the 3xTG neurons seem to be more excitable - lower threshold for spiking, lower step current required to cause spiking. Another is that the spike width appears to be larger in 3xTG cells than in healthy cells.

This is supported by the fact that when $\theta_m$ (Vmo) and $\sigma_m$ (dVm) are plotted against each other, we find a reasonably well defined boundary between the sets of estimated values for each neuron type. Whenever $\theta_m$ increases in a given family of estimates, $\sigma_m$ also increases to compensate, keeping the overall excitability of the neuron relatively constant.

In [18]:
def plot_scatter(param1, param2, handle = 'Type'):
    
    if handle == 'Type':
        palette = dict([('Healthy', 'b'), ('3xTG', 'r')])
    else:
        palette = None
    
    sns.lmplot(param1, param2,
    data = neuron_params,
    fit_reg = False,
    hue = handle,
    palette= palette,
    scatter_kws={'alpha':0.3})
    plt.xlabel(param1)
    #plt.xlim((-200.0,0.0))
    plt.ylabel(param2)
    #plt.ylim((-400.0,0.0))
    plt.savefig('{0}_{1}_{2}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2], handle))
In [35]:
#neuron_params[neuron_params['Neuron'] != '2'].mean()
In [36]:
#neuron_params[neuron_params['Neuron'] == '2'].mean()
#feature_dict['threshold_gNa_dec']
feature_dict['max_dINa_dt_inc']
Out[36]:
'Max $\\dot{I}_{Na}$ Upstroke (pA/ms)'
In [43]:
corrs = neuron_params.corr()

corrs_thres_volt = corrs[feature_dict['threshold_step_current']].sort_values(axis=0, ascending=False, inplace=False)

#corrs_thres_volt
In [23]:
param1 = feature_dict['mean_ahp']
param2 = feature_dict['threshold_voltage']
#param1 = 'Voltage at Max $\dot{I}_{Na}$ Upstroke (mV)'
#param1 = feature_dict['threshold_dIK_inc']
#param1 = 'Voltage at Max $I$ Upstroke (mV)'

#param1 = corrs_thres_volt.index[6]


#param1 = feature_dict['threshold_gNa_dec']
#param1 = feature_dict['width']
#param2 = feature_dict['threshold_voltage']
#param1 = feature_dict['EL']
#param1 = feature_dict['threshold_gNa_inc']

plot_scatter(param1, param2, handle ='Type')
In [9]:
param1 = '$\\theta_m$ (mV)'
param2 = '$\\sigma_m$ (mV)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = False,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))
In [10]:
param1 = '$\\theta_m$ (mV)'
param2 = '$\\sigma_m$ (mV)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = True,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}_regr.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))

One 3xTG neuron appears to primarily contribute to this pattern of separation. Since there are only 2 3xTG neurons and 3 healthy neurons, any conclusions we draw about the two populations is tentative.

In [11]:
param1 = '$\\theta_m$ (mV)'
param2 = '$\\sigma_m$ (mV)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = True,
          hue = 'Neuron',
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}_regr_ident.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))

We find, using variable importance in random forests, that the half-width of a neuron and the voltage at half inactivation ($\theta_h$) for the sodium current are key distinguishing features between 3xTG healthy neurons. Our decision tree uses width as the first feature to split on. Models with larger estimated widths are much more likely to be classified as 3xTG.

The results of the plot of $\theta_m$ against $\sigma_m$ suggests that an important feature distinguishing neurons of the two classes from each other is overall excitability. Therefore an engineered feature containing this information is desirable. Two features in particular have this property. One is the maximum applied step current which will not cause the neuron to generate an action potential. The other is the voltage at which this occurs.

The separation between the two classes of neurons is especially striking when the half-width and the threshold step current are plotted against each other. This is reflected in the decision tree structure, where these features are consecutive, informative splits.

We would likely see even better splits if we included $\theta_h$ in a third dimension, as it is the third consecutive split in our decision tree structure, and it is also fairly informative as measured by the gini information. A gini value close to 0.5 corresponds to a good split, and these values are about 0.5, 0.3, and 0.2, respectively.

In [12]:
param1 = 'threshold step current (pA)'
param2 = 'half-width (ms)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = False,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))

The split between the two groups when $\theta_h$ and the threshold step current are plotted is not quite as clear, but the two quantities seem to be associated with each other, though in this case why they are related is not as clear. $\theta_h$ governs where $I_{Na}$ inactivates and how fast. $I_{Na}$ should turn off more slowly in the range of threshold voltages. We verify below that the threshold voltage and the applied current at which action potentials are generated are correlated. We see plots of threshold voltage or minimal threshold current against $\theta_h$ both have the same pattern and seem to split 3xTG and healthy neurons from each other well.

In [13]:
param1 = 'threshold voltage (mV)'
param2 = '$\\theta_h$ (mV)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = False,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))
In [14]:
param1 = 'threshold step current (pA)'
param2 = '$\\theta_h$ (mV)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = False,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))

We check below that threshold step current and threshold voltage are related to each other.

As we would expect, the two quantities are correlated with each other. It is a little hard to tell from the plot, but healthy cells tend towards larger values of both parameters than do 3xTG cells. Some of these cells are excitable enough that they will generate spikes even at negative applied step currents.

In [15]:
param1 = 'threshold step current (pA)'
param2 = 'threshold voltage (mV)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = False,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))

The rate of change of $I_{Na}$ is a good proxy for $\theta_m$. Because $I_{Na}$ activates very quickly and inactivates relatively more slowly, we expect that the rate of change of $I_{Na}$ with respect to time during an action potential will be maximum when we are the steepest part of the sigmoid, which is $\theta_m$. If we plot these two quantities against each other, we find that this is indeed the case. The correspondence is not exact since the rate of change of $I_Na$ also depends on voltage and $h$ is changing, but we will nevertheless use the voltage at maximum $\dot{I}_{Na}$ as a proxy for $\theta_m$.

In [16]:
param1 = 'Voltage at Max $\\dot{I}_{Na}$ Upstroke (mV)'
param2 = '$\\theta_m$ (mV)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = False,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))

We can visualize the relationship between the voltage of maximum $\dot{I}_{Na}$ and threshold step current. As we would expect, a stronger step current is required to elicit an action potential, and we see that the separation between the two types of neurons is a little better in two dimensions than it is in one dimension.

In [17]:
param1 = 'Voltage at Max $\\dot{I}_{Na}$ Upstroke (mV)'
param2 = 'threshold step current (pA)'

sns.lmplot(param1, param2,
          data = neuron_params,
          fit_reg = False,
          hue = 'Type',
          palette=dict([('Healthy', 'b'), ('3xTG', 'r')]),
          scatter_kws={'alpha':0.3})
plt.xlabel(param1)
plt.ylabel(param2)
plt.savefig('{0}_{1}.png'.format(reverse_feature_dict[param1], reverse_feature_dict[param2]))