def grab_diff_v_from_folder(ew, folder):
    '''get the voltage difference if the file exists
    inputs:
        ew: string that is to be matched with a file in the folder in order to return a 
            value. i.e. if GLIF2 is being requested but there is not GLIF2 file in the
            folder a nan will be returned regardless of whether there is a value of explain 
            variance in the database. For example this would happen if the model was excluded from 
            analysis because of an aberrant parameter.  
        folder: path to the structured folder used in the rest of analysis
    returns:
        either or nan or the explained variance ratio for the requested model
        
    '''
    try:
        file=get_file_path_endswith(folder, ew)
        contents=ju.read(file)
        RSS_of_voltage_diff=contents['noise2']['RSS_of_voltage_diff']
        var_of_voltage_data=contents['noise2']['var_of_voltage_data']
        num_data_points_wo_spike_shape=contents['noise2']['num_data_points_wo_spike_shape']
    except:
        RSS_of_voltage_diff=np.nan
        var_of_voltage_data=np.nan
        num_data_points_wo_spike_shape=np.nan
        
    return RSS_of_voltage_diff, var_of_voltage_data, num_data_points_wo_spike_shape
Exemplo n.º 2
0
 def get_th_inf(folder, endswith):
     if np.any([f.endswith(endswith) for f in os.listdir(folder)]):
         file = get_file_path_endswith(folder, endswith)
         optimized = ju.read(file)
         return optimized['th_inf'] * optimized['coeffs']['th_inf']
     else:
         return np.nan
def count(path, ew):
    '''count the number of files in the structured data directory that 
    ends with the specified input
    Inputs:
        path: string
            path to the structured data directory
        ew: string
            string the file name ends with
    Returns:
        n: integer
            number of files in the structured data directory that end 
            with the specified input string
        
    '''
    folders=[os.path.join(path, f) for f in  os.listdir(structured_data_directory)]
    n=0
    for folder in folders:
        try:
            get_file_path_endswith(folder, ew)
            n=n+1
        except:
            pass 
    return n
 def check_ev_value(folder,ew):
     '''Checks to see if the explained variance of the training data (noise 1) is below the specified value.
     inputs:
         folder:
             path to folder where files are located
         ew: string
             specifies the unique end of a file name of the file searching for
     returns:
         Nothing.  Appends specimen IDs to be excluded to the 'exp_var_exclusion_below' list
     '''
     specimen_ID=os.path.basename(folder)[:9]
     try:
         file=get_file_path_endswith(folder, ew)  #if file doesnt exist this will fail
         dictionary=ju.read(file)
         if dictionary['after_opt']['noise_1']<ev:
             exp_var_exclusion_below.append(specimen_ID)
     except: 
         print 'cant find a file for', specimen_ID, 'this should not happen if the check_sweeps_and_rm_folders.py was run!'
         pass
def get_model(path, EW):
    '''Runs the model for a specified neuron and model
    inputs:
        path: string
            folder path with files for the neuron
        EW: string
            end of file searching for:  options '_GLIF1_neuron_config.json',_GLIF2_neuron_config.json' etc.
    returns:
        run_data: dictionary
            contains data from the model run
            
   '''

    specimen_id=int(os.path.basename(path)[:9])
    file=get_file_path_endswith(path, EW)

    # load data
    dir_name=os.path.join(relative_path, 'mouse_nwb/specimen_'+ str(specimen_id))
    all_sweeps=ctc.get_ephys_sweeps(specimen_id,  os.path.join(dir_name, 'ephys_sweeps.json'))
    #all_sweeps=ctc.get_ephys_sweeps(specimen_id)
    sweeps=get_sweep_num_by_name(all_sweeps, 'Noise 2')
    
    noise2_sweeps = get_sweep_num_by_name(all_sweeps, 'Noise 2')
#    noise2_data=ctc.get_ephys_data(specimen_id).get_sweep(noise2_sweeps[0])
    noise2_data=ctc.get_ephys_data(specimen_id, os.path.join(dir_name, 'ephys.nwb')).get_sweep(noise2_sweeps[0])

    # run model with current
    stimulus2=noise2_data['stimulus']
    neuron_config=ju.read(file)
    neuron_config['dt']=1./noise2_data['sampling_rate'] #reset dt to the stimulus dt not the optimization dt
    neuron = GlifNeuron.from_dict(neuron_config)
    1/noise2_data['sampling_rate']
    run_data = neuron.run(stimulus2)
    run_data['time']=np.arange(0, len(run_data['voltage']))*neuron_config['dt']
    run_data['El_reference']=neuron_config['El_reference']    
    run_data['stimulus']=noise2_data['stimulus']

    return run_data
def extract_values(folder, the_end_file_match):
    '''extract threshold values from a config file with a specified ending
    inputs:
        folder: string 
            Path to second tier data folder with the data files of a specific neuron inside
        the_end_file_match: string
            Specifies the end of desired file name. Used for grabbing different model level config files.
    outputs:
        Dictionary of values
    '''
    dictionary = {}
    dictionary['th_NOT_opt'] = {'from_zero': np.nan, 'absolute': np.nan}
    dictionary['th_opt'] = {'from_zero': np.nan, 'absolute': np.nan}
    dictionary['th_coeff'] = np.nan
    dictionary['El_reference'] = np.nan

    if np.any([f.endswith(the_end_file_match) for f in os.listdir(folder)]):
        file = get_file_path_endswith(folder, the_end_file_match)
        config_dict = ju.read(file)

        dictionary['th_NOT_opt'][
            'absolute'] = config_dict['th_inf'] + config_dict['El_reference']
        dictionary['th_opt']['absolute'] = config_dict['th_inf'] * config_dict[
            'coeffs']['th_inf'] + config_dict['El_reference']

        dictionary['th_NOT_opt']['from_zero'] = config_dict['th_inf']
        dictionary['th_opt']['from_zero'] = config_dict[
            'th_inf'] * config_dict['coeffs']['th_inf']

        dictionary['th_coeff'] = config_dict['coeffs']['th_inf']
        dictionary['El_reference'] = config_dict['El_reference']
    else:
        if 'GLIF1' in the_end_file_match:
            print 'THERE IS NO LIF MODEL FOR NEURON', specimen_ID
            raise Exception('there should be a GLIF1 in every level')
        pass

    return dictionary
Exemplo n.º 7
0
def cycle(folder, end_with, model_string, data_dict):
    '''Calculates the squared error of the subthreshold voltage specified folder and model.
    This function is here for the repetitive nature of the code.  
    inputs:
        folder: string
            path to folder containing model configurations.
        end_with: string
            end of file searching for:  options '_GLIF1_neuron_config.json',_GLIF2_neuron_config.json' etc.
        model_string: string
            string searching for in model name: options '(LIF)', '(LIF-R)', '(LIF-ASC)', '(LIF-R_ASC)', '(LIF-R_ASC_A')
        data_dict: dictionary
            contains data returned by extract_data
        stimulus_type: string
            can be 'noise1' or 'noise2'
    output:
        writes RSS, and variance of the voltage difference between the model and the data, the variance
        of the data itself and the number of data points in subthreshold data considering into file ending
        with '*_subthr_v.json' in folder
    '''

    try:
        file = get_file_path_endswith(folder, end_with)
    except:
        return

    specimen_id = int(os.path.basename(folder)[:9])
    cre = os.path.basename(folder)[10:]

    # run forced spike protocol and save subthreshold voltage difference to file
    out = running('noise1', file, data_dict)
    out_to_update = running('noise2', file, data_dict)
    out.update(out_to_update)
    output_file_name = os.path.join(
        folder,
        str(specimen_id) + '_' + cre + end_with[:7] + 'subthr_v.json')
    json_utilities.write(output_file_name, out)
Exemplo n.º 8
0
#------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------     
    
# sorting folders into an order (not necessary)
folders=np.sort([os.path.join(struct_data_dir1, f) for f in  os.listdir(struct_data_dir1)])
found1_flag=0
for specimen_id_directory in folders:
    specimen_id=os.path.basename(specimen_id_directory)[:9]
    for ends_with in ['_GLIF1'+end, 
                     '_GLIF2'+end,
                     '_GLIF3'+end,
                     '_GLIF4'+end,
                     '_GLIF5'+end]:
        # see if specified model configuration file exist in the data folders
        try:   
            json_file1=get_file_path_endswith(specimen_id_directory, ends_with)
        except:
            continue
        try:   
            json_file2=get_file_path_endswith(os.path.join(struct_data_dir2,os.path.basename(specimen_id_directory)), ends_with)
        except:
            continue
        
        # if both files exist get their contents
        d1=ju.read(json_file1)
        d2=ju.read(json_file2)
        
        # look for differences in contents with a tolerance
        result=list(dictdiffer.diff(d1,d2, tolerance=.001))
        pp=pprint.PrettyPrinter(indent=4)
        if result!=[]:
#------------------------------------------------------------
#--open a file from the preprocessor and plot single traces--
#------------------------------------------------------------

#set up figure
plt.figure(figsize=(14, 6))
I1_plt = plt.subplot2grid((7, 1), (0, 0))
V2_plt = plt.subplot2grid((7, 1), (1, 0), rowspan=6)

specimen_id = '474637203'  #htr3
sub_folder = os.path.join(
    data_path,
    os.listdir(data_path)[np.where(
        [specimen_id in fname for fname in os.listdir(data_path)])[0][0]])
file = get_file_path_endswith(sub_folder, '_preprocessor_values.json')
neuron_dict = ju.read(file)
R_NO_asc = neuron_dict['resistance']['R_test_list']['mean']
R_asc = neuron_dict['resistance']['R_fit_ASC_and_R']['mean']
C = neuron_dict['capacitance']['C_test_list']['mean']
El = neuron_dict['El']['El_noise']['measured']['mean']

# get the sweeps
dir_name = os.path.join(relative_path, 'mouse_nwb/specimen_' + specimen_id)
the_sweeps = ctc.get_ephys_sweeps(int(specimen_id),
                                  os.path.join(dir_name, 'ephys_sweeps.json'))
noise1_sweeps = get_sweep_num_by_name(the_sweeps, 'Noise 1')

# put data in the format required for functions below
n1_s1_data = ctc.get_ephys_data(int(specimen_id),
                                os.path.join(dir_name, 'ephys.nwb')).get_sweep(
Exemplo n.º 10
0
    n2_sweeps=get_sweep_num_by_name(the_sweeps, 'Noise 2')
    
    # check to see if their are at least two noise 1 and noise 2 sweeps in the data nwb file
    if not check_more_than_two_sweeps(n1_sweeps, nwb):
        print specimen_id ,"has less than two noise_1 sweeps"
        logging.warning(str(specimen_id) +" has less than two noise_1 sweeps")
        sp_id_to_remove.append(specimen_id)
        continue
    if not check_more_than_two_sweeps(n2_sweeps, nwb):
        print specimen_id ,"has less than two noise_2 sweeps"
        logging.warning(str(specimen_id) +" has less than two noise_2 sweeps")
        sp_id_to_remove.append(specimen_id)
        continue
    
    # check if there is at least a level 1 GLIF model in the structured data directory    
    if not get_file_path_endswith(specimen_id_directory, '_GLIF1_neuron_config.json'):
        print specimen_id, "has no model file!!!!!!!"
        logging.warning(str(specimen_id) +" has no model file")
        sp_id_to_remove.append(specimen_id)
        continue

    # check to see that the spike times in the model files are the same (if not, it is likely that the was more than one stimulus amplitude played to the neuron) 
    glif_spike_times_n1=get_model_spike_times_from_nwb('_GLIF1_neuron_config.json', specimen_id_directory, '(LIF)', n1_sweeps, where_running)
    if check_spike_times_identical(glif_spike_times_n1):
        pass
    else: 
        print specimen_id , "noise 1 has inconsistent model sweep times"
        logging.warning(str(specimen_id)+"noise 1 has inconsistent model sweep times")
        sp_id_to_remove.append(specimen_id)
    
    glif_spike_times_n2=get_model_spike_times_from_nwb('_GLIF1_neuron_config.json', specimen_id_directory, '(LIF)', n2_sweeps, where_running)
Exemplo n.º 11
0
def cycle(specimen_id_directory, ends_with, model_string, data_dict):
    '''Calculates the explained variance ratio for the specified specimen_id_directory and model.
    This function is here for the repetitive nature of the code.  
    inputs:
        specimen_id_directory: string
            path to structured data directory containing neuron_config, preprocessor, etc., files.
        ends_with: string
            end of file searching for:  options "_GLIF1_neuron_config.json","_GLIF2_neuron_config.json" etc.
        model_string: string
            string searching for in model name: options '(LIF)', '(LIF-R)', '(LIF-ASC)', '(LIF-R_ASC)', '(LIF-R_ASC_A')
        data_dict: dictionary
            contains data returned by extract_data
    output:
        writes explained variance ratios into file ending with 'exp_var_ratio_10ms.json' in specimen_id_directory
    '''

    # see if specified model configuration file exist in the data folders
    try:
        file = get_file_path_endswith(specimen_id_directory, ends_with)
    except:
        return

    specimen_id = int(os.path.basename(specimen_id_directory)[:9])
    cre = os.path.basename(specimen_id_directory)[10:]

    #confirming the dt of noise 1 and noise 2 are the same in the file
    if data_dict['n1_dt'] != data_dict['n2_dt']:
        raise Exception('The dt in noise 1 and noise 2 is not the same.')

    # initializing data structures for explained variance calculations
    ev = {}
    ev['after_opt'] = {}
    ev['before_opt'] = {}
    ev['model_spike_times_same_across_sweeps'] = {}
    ev['run_model_spike_times_match_database'] = {}

    # get the spike indicies of the model from the .nwb file in the database
    if specimen_id == 580895033:
        if ends_with == '_GLIF1_neuron_config.json':
            ev['model_spike_times_same_across_sweeps']['n1'] = 1
            ev['model_spike_times_same_across_sweeps']['n2'] = 1
            ev['run_model_spike_times_match_database']['n2'] = True
            ev["n2_after_opt_sanitycheck"] = 0.9838105341820447
            ev["after_opt"]["noise_1"] = 0.9793973377573459
            ev["after_opt"]["noise_2"] = 0.983810807305087
            ev["before_opt"]["noise_1"] = 0.8454442315760935
            ev["before_opt"]["noise_2"] = 0.8493365092125525
        if ends_with == '_GLIF2_neuron_config.json':
            ev['model_spike_times_same_across_sweeps']['n1'] = 1
            ev['model_spike_times_same_across_sweeps']['n2'] = 1
            ev['run_model_spike_times_match_database']['n2'] = True
            ev["n2_after_opt_sanitycheck"] = 0.9889582213030378
            ev["after_opt"]["noise_1"] = 0.9885054832008723
            ev["after_opt"]["noise_2"] = 0.988952726396574
            ev["before_opt"]["noise_1"] = 0.8852614534451949
            ev["before_opt"]["noise_2"] = 0.8882540368687765
        if ends_with == '_GLIF3_neuron_config.json':
            ev['model_spike_times_same_across_sweeps']['n1'] = 1
            ev['model_spike_times_same_across_sweeps']['n2'] = 1
            ev['run_model_spike_times_match_database']['n2'] = True
            ev["n2_after_opt_sanitycheck"] = 0.972059377795663
            ev["after_opt"]["noise_1"] = 0.964542013582842
            ev["after_opt"]["noise_2"] = 0.972065677218419
            ev["before_opt"]["noise_1"] = 0.9175506860780771
            ev["before_opt"]["noise_2"] = 0.9192162154035345
        if ends_with == '_GLIF4_neuron_config.json':
            ev['model_spike_times_same_across_sweeps']['n1'] = 1
            ev['model_spike_times_same_across_sweeps']['n2'] = 1
            ev['run_model_spike_times_match_database']['n2'] = True
            ev["n2_after_opt_sanitycheck"] = 0.9838078849900366
            ev["after_opt"]["noise_1"] = 0.9774371918205483
            ev["after_opt"]["noise_2"] = 0.983816449506429
            ev["before_opt"]["noise_1"] = 0.9481063969607645
            ev["before_opt"]["noise_2"] = 0.952096857211585
        if ends_with == '_GLIF5_neuron_config.json':
            ev['model_spike_times_same_across_sweeps']['n1'] = 1
            ev['model_spike_times_same_across_sweeps']['n2'] = 1
            ev['run_model_spike_times_match_database']['n2'] = True
            ev["n2_after_opt_sanitycheck"] = 0.9836467816928267
            ev["after_opt"]["noise_1"] = 0.9784782997497251
            ev["after_opt"]["noise_2"] = 0.983643486774882
            ev["before_opt"]["noise_1"] = 0.8846618004335125
            ev["before_opt"]["noise_2"] = 0.8904106067655934
        json_utilities.write(
            os.path.join(
                specimen_id_directory,
                str(specimen_id) + '_' + cre + ends_with[:7] +
                'exp_var_ratio_10ms.json'), ev)
        print '\twrote output to ', os.path.join(
            specimen_id_directory,
            str(specimen_id) + '_' + cre + ends_with[:7] +
            'exp_var_ratio_10ms.json')
        return

    model_n1_nwb_ind = get_model_spike_ind_from_nwb(
        ends_with, specimen_id_directory, model_string,
        data_dict['noise1_sweeps'], data_dict['n1_dt'], where_running)[0]

    # get explained variances
    ev['after_opt']['noise_1'] = exVar(data_dict['noise1_spike_ind'],
                                       [model_n1_nwb_ind], [.01],
                                       data_dict['n1_dt'],
                                       len(data_dict['noise1_stim']))[0]
    ev['after_opt']['noise_2'] = get_ev_from_folder(
        ends_with, specimen_id_directory,
        model_string)  # get explained varience ratio from glif api

    #----------------------------------------------------------------------------------------------
    #---------grabbing data along with performing a series of sanity checks for later use-----------
    #----------------------------------------------------------------------------------------------
    neuron_config = json_utilities.read(file)
    neuron_config['dt'] = data_dict['n2_dt']
    neuron = GlifNeuron.from_dict(neuron_config)  #set up model for running
    print '\trunning', specimen_id, 'noise 2 after optimization as a sanity check to compare with what is in database'
    model_n2_after = neuron.run(data_dict['noise2_stim'])  #running model
    print '\tfinished', specimen_id, 'running model on noise 2 after optimization as a sanity check to compare with what is in database'

    # before calculating explained variance this is a sanity check to make sure spike times and steps match with the dt within the same file
    assert model_n2_after['grid_spike_times'] / data_dict[
        'n2_dt'] == model_n2_after['spike_time_steps']

    # calculate the explained variance from the the model run
    ev_GLIF1_n2_after = exVar(data_dict['noise2_spike_ind'],
                              [model_n2_after['spike_time_steps']],
                              [.01], data_dict['n2_dt'],
                              len(data_dict['noise2_stim']))
    ev['n2_after_opt_sanitycheck'] = ev_GLIF1_n2_after[
        0]  #this is from the rerun done here

    # Sanity check to make sure model spike times from the database are all the same..
    # Note that all of these should have been eliminated via the "check_sweeps_and_rm_folders.py" script
    glif_spike_times_n1 = get_model_spike_times_from_nwb(
        ends_with, specimen_id_directory, model_string,
        data_dict['noise1_sweeps'], where_running)
    ev['model_spike_times_same_across_sweeps'][
        'n1'] = check_spike_times_identical(glif_spike_times_n1)

    glif_spike_times_n2 = get_model_spike_times_from_nwb(
        ends_with, specimen_id_directory, model_string,
        data_dict['noise2_sweeps'], where_running)
    ev['model_spike_times_same_across_sweeps'][
        'n2'] = check_spike_times_identical(glif_spike_times_n2)

    # sanity check to make sure calculated model spike times run here match what is in the Allen Institute Cell Types Database.
    # just checking against first sweep since they sweeps should all be identical
    ev['run_model_spike_times_match_database']['n2'] = np.allclose(
        model_n2_after['grid_spike_times'],
        glif_spike_times_n2[0],
        atol=.0001,
        rtol=0,
        equal_nan=True)

    #--------------------------------------------------------
    #--------------------------------------------------------
    #--------------------------------------------------------

    # running and calculating exp var for data before optimization
    neuron_config['dt'] = data_dict['n1_dt']
    neuron_config['coeffs']['th_inf'] = 1.0
    neuron = GlifNeuron.from_dict(neuron_config)
    print '\trunning noise 1', specimen_id, 'before optimization'
    model_n1_before = neuron.run(data_dict['noise1_stim'])
    ev_GLIF1_n1_before = exVar(data_dict['noise1_spike_ind'],
                               [model_n1_before['spike_time_steps']],
                               [.01], data_dict['n1_dt'],
                               len(data_dict['noise1_stim']))
    print '\tfinished noise 1', specimen_id, 'before optimization'

    print '\trunning noise 2', specimen_id, 'before optimization'
    model_n2_before = neuron.run(data_dict['noise2_stim'])
    ev_GLIF1_n2_before = exVar(data_dict['noise2_spike_ind'],
                               [model_n2_before['spike_time_steps']],
                               [.01], data_dict['n2_dt'],
                               len(data_dict['noise2_stim']))
    print '\tfinished noise 2', specimen_id, 'before optimization'

    ev['before_opt']['noise_1'] = ev_GLIF1_n1_before[0]
    ev['before_opt']['noise_2'] = ev_GLIF1_n2_before[0]

    # save the file to the local structured data directory
    json_utilities.write(
        os.path.join(
            specimen_id_directory,
            str(specimen_id) + '_' + cre + ends_with[:7] +
            'exp_var_ratio_10ms.json'), ev)
    print '\twrote output to ', os.path.join(
        specimen_id_directory,
        str(specimen_id) + '_' + cre + ends_with[:7] +
        'exp_var_ratio_10ms.json')
Exemplo n.º 12
0
structured_data_directory=os.path.join(relative_path, 'create_data_dir', 'mouse_struc_data_dir')
#structured_data_directory=os.path.join(relative_path, 'create_data_dir', 'human_struc_data_dir')

#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
    
# sort the data so that specifying start and end integers works
folders=np.sort([os.path.join(structured_data_directory, f) for f in  os.listdir(structured_data_directory)])

# check for issues in the "*_GLIF*_exp_var_ratio_10ms.json" files
for folder in folders:
    specimen_id=os.path.basename(folder)[:9]
    for s in ['GLIF1', 'GLIF2', 'GLIF3', 'GLIF4', 'GLIF5']:
        file =False
        try: file=get_file_path_endswith(folder, s+'_exp_var_ratio_10ms.json')
        except: pass 
        if file:
            print 'checking', file
            dict=ju.read(file)
            database_value=dict['n2_after_opt_sanitycheck']
            calculated_value=dict['after_opt']['noise_2']
            rtol=0.
            atol=1e-3
            
            # check if two values recorded in the "*_GLIF*_exp_var_ratio_10ms.json" are the same
            if not np.isclose(database_value, calculated_value, rtol=rtol, atol=atol):
                print specimen_id, s, ':the value difference,',database_value,'-', calculated_value, '=', np.absolute(database_value-calculated_value), 'is > the tolerance, ',  atol + rtol * np.absolute(calculated_value)
            
            # check to make sure model spike times of noise 1 from the database are all the same.
            if dict['model_spike_times_same_across_sweeps']['n1']==False:
def general_exclusions(folder_path, 
                       n_in_cre=5, 
                       resist=True, 
                       th_inf_bad=True, 
                       spike_cut=True, 
                       ev=.2,
                       accidental_exclusion=True): 
    '''Returns a list of specimen ids that will be excluded from all levels and analysis.
    Note that some of these exclusions may be irrelevant for the curated data via the Allen Institute
    Cell Types Database. Nonetheless, I leave these exclusions here for my own use on internal data.
    Inputs:
        folder_path: string
            path to the structured data directory 
        n_in_cre: integer
            specifies number of neurons that have exist in a cre line to include them in the analysis
        resist: boolean
            if True, exclude neurons which have a calculated resistance over 1000 MOhms
        th_inf_bad: boolean
            if True, exclude neurons which have a calculated threshold less than -60 mV
        spike_cut: boolean
            if True, exclude neurons which have an intercept larger than 30 mV after fitting the spike cut length 
        ev: float
            exclude neurons that have an GLIF1 explained variance on noise 1 less than the provided value. 
            Note that noise 1 is used because exclusion criteria are only applied to training data 
        accidental_exclusion: boolean
            one neuron was either accidentally excluded from the analysis or removed for an unknown reason.  
            If True, exclude this neuron. 
    Returns: 
        exclude_me_sp_ids: list of strings
            list of neuron by specimen ids to be eliminated from the structured data directory
    '''
    initial_sp_ids=[f[0:9] for f in os.listdir(folder_path)]
    print 'GENERAL EXCLUSIONS: there will be overlap in numbers below i.e. some models will be excluded for more than one reason'
    print '\tTotal number of preprocessed files:', len(initial_sp_ids)
    folders=[os.path.join(folder_path, f) for f in  os.listdir(folder_path)]
    
    strange_pp_exclusion=[] #exclusion for preprocessor files that do not have the correct format

    # exclude via slope and intercept from spike cutting results
    spike_cutting_exclusions=[]
    if spike_cut:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            pp_file=get_pp_path(folder)
            pp_dict=ju.read(pp_file)
            try:
                if pp_dict['spike_cutting']['NOdeltaV']['intercept'] > .03:
                    spike_cutting_exclusions.append(specimen_ID)
            except:
                print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE'
                strange_pp_exclusion.append(specimen_ID)   
        print '\t', len(set(spike_cutting_exclusions)), 'neurons were excluded for having an intercept larger than .03'
    
    # exclude based on the measured experimental threshold
    # note that the experimental threshold is the same for all models of the same neuron so just look at GLIF1 file.
    th_inf_exclusion_list=[]
    if th_inf_bad:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            pp_file=get_pp_path(folder)
            pp_dict=ju.read(pp_file)
            try:
                if pp_dict['th_inf']['via_Vmeasure']['value']< -.06:
                    th_inf_exclusion_list.append(specimen_ID)
            except:
                print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE'
                strange_pp_exclusion.append(specimen_ID)   

        print '\t', len(set(th_inf_exclusion_list)), 'neurons have a th_inf less than -60 mV'                   

    # exclude based on resistance
    resistance_exclusion_list=[]
    if resist:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            pp_file=get_pp_path(folder)
            pp_dict=ju.read(pp_file)
            try:
                if pp_dict['resistance']['R_test_list']['mean']>1000.e6:
                    resistance_exclusion_list.append(specimen_ID)
            except:
                print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE'
                strange_pp_exclusion.append(specimen_ID)    
        print '\t', len(set(resistance_exclusion_list)), 'neurons have a resistance fit WITHOUT ASC larger than 1000 MOhms.'

        print '\t', len(set(strange_pp_exclusion)), 'neurons have a strange looking preprocessor file.'

    # exclude based on explained variance on training data
    exp_var_exclusion_no_file=[]
    if ev:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            try:
                file=get_file_path_endswith(folder, 'GLIF1_exp_var_ratio_10ms.json')
            except:
                exp_var_exclusion_no_file.append(specimen_ID) 
        print '\t', len(set(exp_var_exclusion_no_file)),'neurons have no explained variance file which means they probably had a empty array in a noise 1.  See calc_all_explained_variance.py variable model_GLIF1_n1_after'

    exp_var_exclusion_below=[]
    
    # the following mouse neurons were either accidentally excluded from the analysis or removed for a reason that eludes me now.
    accidental_exclusions=[]
    if accidental_exclusion:
        if os.path.isdir(os.path.join(folder_path,'569739534'+'_Chrna2-Cre_OE25')): #if this directory exists get rid of it.
            accidental_exclusions=['569739534']
        else: pass #neuron already excluded or not in directory
        print '\t', len(set(accidental_exclusions)),'neurons were excluded from the analysis by accident. Set accidental_exclusion flag to False to use it if reprocessing all data.'
 
    
    def check_ev_value(folder,ew):
        '''Checks to see if the explained variance of the training data (noise 1) is below the specified value.
        inputs:
            folder:
                path to folder where files are located
            ew: string
                specifies the unique end of a file name of the file searching for
        returns:
            Nothing.  Appends specimen IDs to be excluded to the 'exp_var_exclusion_below' list
        '''
        specimen_ID=os.path.basename(folder)[:9]
        try:
            file=get_file_path_endswith(folder, ew)  #if file doesnt exist this will fail
            dictionary=ju.read(file)
            if dictionary['after_opt']['noise_1']<ev:
                exp_var_exclusion_below.append(specimen_ID)
        except: 
            print 'cant find a file for', specimen_ID, 'this should not happen if the check_sweeps_and_rm_folders.py was run!'
            pass
        
        
    if ev:
        for folder in folders:
            check_ev_value(folder, 'GLIF1_exp_var_ratio_10ms.json')                
        print '\t', len(set(exp_var_exclusion_below)), 'neurons have a GLIF explained variance on noise 1 training data of less than', ev
                    

    # get the set of all neurons that are still included in analysis after the above exclusions
    init_excluded_id_list=list(set(spike_cutting_exclusions+
                              resistance_exclusion_list+
                              th_inf_exclusion_list+
                              strange_pp_exclusion+
                              exp_var_exclusion_no_file+
                              exp_var_exclusion_below+
                              accidental_exclusions))
    reduced_sp_ids=list(set(initial_sp_ids)-set(init_excluded_id_list)) # specimen ids remaining after above exclusions
    
    # remove data that does not have at least a specified number (n_in_cre) of neurons in a cre line    
    if n_in_cre is not False:
        small_cre_line_exclusion=np.array([])
        cre_list=[]
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            if specimen_ID in reduced_sp_ids:
                cre_list.append({'sp':specimen_ID, 'cre': os.path.basename(folder)[10:]})
                
        df=pd.DataFrame(cre_list)
        for cre in df['cre'].unique():
            if len(df[df['cre']==cre])<n_in_cre:
                small_cre_line_exclusion=np.append(small_cre_line_exclusion, (df[df['cre']==cre]['sp'].values))

    # create list of specimen IDs whose folder should be completely eliminated
    exclude_me_sp_ids=list(set(small_cre_line_exclusion.tolist()+
                               init_excluded_id_list))

    print 'A total of',len(exclude_me_sp_ids), 'out of', len(folders), 'neurons are excluded via general exclusion criteria leaving',len(folders)-len(exclude_me_sp_ids), 'for this analysis'
    
    return exclude_me_sp_ids
            for f in os.listdir(folder):
                if ('GLIF2' in f )  or ('GLIF4' in f) or ('GLIF5' in f):
                    os.remove(os.path.join(folder,f))
    
    # find and remove bad spike component of threshold from the directory                 
    bad_voltage_comp_of_th=exclude_via_v_comp_of_th(structured_data_directory)   
    for folder in folders:
        sp_id=os.path.basename(folder)[0:9]                 
        if sp_id in bad_voltage_comp_of_th:
            for f in os.listdir(folder):
                if ('GLIF5' in f):
                    os.remove(os.path.join(folder,f))

    # count up the files in the directory
    for folder in folders:
        if not get_file_path_endswith(folder,'GLIF1_neuron_config.json'):
            print 'nope'

    
    cre_list=[]
    folders=[os.path.join(structured_data_directory, f) for f in  os.listdir(structured_data_directory)]
    for folder in folders:
        specimen_ID=os.path.basename(folder)[:9]
        cre_list.append({'sp':specimen_ID, 'cre': os.path.basename(folder)[10:]})
    df=pd.DataFrame(cre_list)
    print df.groupby('cre').size()    
    
    # count up the files in the directory
    print 'TOTALS'
    print 'GLIF1 has', count(structured_data_directory,'_GLIF1_neuron_config.json')
    print 'GLIF2 has', count(structured_data_directory,'_GLIF2_neuron_config.json')
Exemplo n.º 15
0
import sys
relative_path = os.path.dirname(os.getcwd())
sys.path.append(os.path.join(relative_path, 'libraries'))
from data_library import check_and_organize_data, get_file_path_endswith, get_pp_path
from pub_plot_library import distribution_plot

data_path = os.path.join(relative_path, 'mouse_struc_data_dir')
folders = [os.path.join(data_path, f) for f in os.listdir(data_path)]

all_neurons = []
for folder in folders:
    specimen_ID = os.path.basename(folder)[:9]
    cre = os.path.basename(folder)[10:]
    try:
        get_file_path_endswith(
            folder, '_GLIF5_neuron_config.json'
        )  #checks if the file is there, if not the values should not be used
    except:
        continue
    pp_file = get_pp_path(folder)
    pp_dict = ju.read(pp_file)
    if pp_dict['threshold_adaptation'][
            'a_voltage_comp_of_thr_from_fitab'] is not None and pp_dict[
                'threshold_adaptation'][
                    'b_voltage_comp_of_thr_from_fitab'] is not None:
        all_neurons.append([
            specimen_ID, cre,
            pp_dict['threshold_adaptation']['a_voltage_comp_of_thr_from_fitab']
            / pp_dict['threshold_adaptation']
            ['b_voltage_comp_of_thr_from_fitab'],
            np.log10(pp_dict['threshold_adaptation']
Exemplo n.º 16
0
                                                                                             MAKE_PLOT=True,
                                                                                             SHOW_PLOT=True,
                                                                                             BLOCK=True,
                                                                                             PUBLICATION_PLOT=True)


# load data out of configuration files
data_path = os.path.join(relative_path, 'mouse_struc_data_dir')
folders = [os.path.join(data_path, f) for f in os.listdir(data_path)]

all_neurons = []
for folder in folders:
    specimen_ID = os.path.basename(folder)[:9]
    cre = os.path.basename(folder)[10:]
    try:
        file = get_file_path_endswith(folder, '_GLIF2_neuron_config.json')
    except:
        continue
    neuron_dict = ju.read(file)
    all_neurons.append([
        specimen_ID, cre,
        neuron_dict['threshold_reset_method']['params']['a_spike'] * 1.e3,
        1. / neuron_dict['threshold_reset_method']['params']['b_spike'] * 1.e3
    ])

(cre_dict) = check_and_organize_data(all_neurons)

percentile_dict = distribution_plot(cre_dict,
                                    2,
                                    3,
                                    xlabel=r'$\delta \Theta_s (mV)$',