def exclude_via_spike_comp_of_thr(folder_path):
    '''return list of specimen ids excluded that should be excluded from the directory
    via spike component of threshold.  These specimens_id should be removed from all 
    levels with reset rules.
    input:
        folder_path: string
            Path to a structured data directory.   
            Inside the path should exist a series of folders with a
            name format: specimen id_cre for each neuron.  Inside those inner folders
            are the neuron configs of the GLIF models and preprocessor files. 
    returns:
        spike_comp_of_thr_exclusion_list: list of strings
        list of specimen ids to be excluded
    '''
    initial_sp_ids=[f[0:9] for f in os.listdir(folder_path)]
    folders=[os.path.join(folder_path, f) for f in  os.listdir(folder_path)]
    
    none_exclusion=[]
    spike_comp_of_thr_exclusion_list=[]
    total_neurons_with_stim_for_reset_rules=0
    exclusion_a_greaterthan_p02=[]
    exclusion_a_lessthan_0=[]
    exclusion_1_over_b_greaterthan_p1=[]
        
    for folder in folders:
        specimen_ID=os.path.basename(folder)[:9]
        pp_file=get_pp_path(folder)
        pp_dict=ju.read(pp_file)
        if pp_dict['threshold_adaptation']['a_spike_component_of_threshold'] is not None and pp_dict['threshold_adaptation']['b_spike_component_of_threshold'] is not None:
            #get overall idea of exclusion reasons
            total_neurons_with_stim_for_reset_rules=total_neurons_with_stim_for_reset_rules+1
            if pp_dict['threshold_adaptation']['a_spike_component_of_threshold']>.02:
                exclusion_a_greaterthan_p02.append(specimen_ID)
            if pp_dict['threshold_adaptation']['a_spike_component_of_threshold']<=0:
                exclusion_a_lessthan_0.append(specimen_ID)
            if 1./pp_dict['threshold_adaptation']['b_spike_component_of_threshold']>.1:
                exclusion_1_over_b_greaterthan_p1.append(specimen_ID)
        else:
            none_exclusion.append(specimen_ID)
    
    spike_comp_of_thr_exclusion_list=list(set(exclusion_a_greaterthan_p02+exclusion_a_lessthan_0+exclusion_1_over_b_greaterthan_p1+none_exclusion))
    print 'of', len(folders), 'neurons,', total_neurons_with_stim_for_reset_rules, 'neurons have the stimuli necessary for reset rules. Of those, excluding ', len(list(set(exclusion_a_greaterthan_p02+exclusion_a_lessthan_0+exclusion_1_over_b_greaterthan_p1))), 'excluded due to a_bad spike or b_spike'
    print '\t this leaves a total of', len(spike_comp_of_thr_exclusion_list), 'excluded due to a_spike or b_spike and a total of ', len(folders)-len(spike_comp_of_thr_exclusion_list), 'for spike component of threshold analysis' 
    return spike_comp_of_thr_exclusion_list
def exclude_via_v_comp_of_th(folder_path):
    '''return list of specimen ids excluded that should be excluded from the directory
    via threshold adaptation.  These specimens_id should be removed from level 5 GLIF. 
    input:
        folder_path: string
            Path to a structured data directory.   
            Inside the path should exist a series of folders with a
            name format: specimen id_cre for each neuron.  Inside those inner folders
            are the neuron configs of the GLIF models and preprocessor files. 
    returns:
        exclusion_list: list of strings
            list of specimen ids to exclude
    '''
    initial_sp_ids=[f[0:9] for f in os.listdir(folder_path)]
    folders=[os.path.join(folder_path, f) for f in  os.listdir(folder_path)]
    none_exclusion=[]
    exclusion_a_lessthan_neg50=[]
    exclusion_b_lessthan_p1=[]
    strange_pp_exclusion=[] #exclusion for preprocessor files that do not have the correct format
    
    for folder in folders:
        specimen_ID=os.path.basename(folder)[:9]
        pp_file=get_pp_path(folder)
        pp_dict=ju.read(pp_file)
        try:
            if pp_dict['threshold_adaptation']['a_voltage_comp_of_thr_from_fitab'] is not None and pp_dict['threshold_adaptation']['b_voltage_comp_of_thr_from_fitab'] is not None:
                if pp_dict['threshold_adaptation']['a_voltage_comp_of_thr_from_fitab']<-50.:
                    exclusion_a_lessthan_neg50.append(specimen_ID)
                if pp_dict['threshold_adaptation']['b_voltage_comp_of_thr_from_fitab']<.1 :
                    exclusion_b_lessthan_p1.append(specimen_ID)
            else: 
                none_exclusion.append(specimen_ID)
        except:
            print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE'
            strange_pp_exclusion.append(specimen_ID)   
            

    print len(set(exclusion_a_lessthan_neg50+exclusion_b_lessthan_p1)), 'models of', len(folders), 'have a bad a_volotage or b_voltage. Note that many of these dont have reset rules either.' 
    print '\t', len(none_exclusion), 'of', len(folders), 'total models dont have threshold adaptation'
    print set(strange_pp_exclusion), 'specimen ids have a strange preprocessor file'
    exclusion_list=list(set(exclusion_a_lessthan_neg50+exclusion_b_lessthan_p1+none_exclusion+strange_pp_exclusion))  

    return exclusion_list
all_neurons = []
std_error_list = []
spike_length_list = []
reciprocal_num_sp_list = []
ev_LIFASC_list = []
for folder in folders:
    specimen_id = os.path.basename(folder)[:9]
    cre = os.path.basename(folder)[10:]

    #get standard error from fitting spike reset rules
    std_err = df[df['specimen_id'] == int(
        specimen_id)]['standard_err'].values[0]
    std_error_list.append(std_err)

    #get spike cut length
    pp_file = get_pp_path(folder)
    pp_dict = json_utilities.read(pp_file)
    length = pp_dict['spike_cut_length']['no deltaV shift'][
        'length'] * 1000. * pp_dict[
            'dt_used_for_preprocessor_calculations']  #note this is converted to ms as opposed to seconds
    spike_length_list.append(length)

    #get number of spikes in noise_1
    if specimen_id == '580895033':
        # note that one could copy the data from the ephys_sweeps.json file from the archive
        # at http://download.alleninstitute.org/informatics-archive/september-2017/mouse_cell_types/glif/
        # to a directory named 'cell_data' (where the other data is automatically downloaded if one
        # is reprocessing data from the Allen Institute Cell Types Database) and then comment
        # in the relevant line below to get the values. However, here I save you from that necessity.
        #        all_sweeps=ctc.get_ephys_sweeps(580895033, file_name=os.path.join(relative_path,'mouse_nwb/specimen_580895033/ephys_sweeps.json'))
        #        num_of_spikes=np.mean([s['num_spikes'] for s in all_sweeps if s['stimulus_name'] == 'Noise 1' ])
def general_exclusions(folder_path, 
                       n_in_cre=5, 
                       resist=True, 
                       th_inf_bad=True, 
                       spike_cut=True, 
                       ev=.2,
                       accidental_exclusion=True): 
    '''Returns a list of specimen ids that will be excluded from all levels and analysis.
    Note that some of these exclusions may be irrelevant for the curated data via the Allen Institute
    Cell Types Database. Nonetheless, I leave these exclusions here for my own use on internal data.
    Inputs:
        folder_path: string
            path to the structured data directory 
        n_in_cre: integer
            specifies number of neurons that have exist in a cre line to include them in the analysis
        resist: boolean
            if True, exclude neurons which have a calculated resistance over 1000 MOhms
        th_inf_bad: boolean
            if True, exclude neurons which have a calculated threshold less than -60 mV
        spike_cut: boolean
            if True, exclude neurons which have an intercept larger than 30 mV after fitting the spike cut length 
        ev: float
            exclude neurons that have an GLIF1 explained variance on noise 1 less than the provided value. 
            Note that noise 1 is used because exclusion criteria are only applied to training data 
        accidental_exclusion: boolean
            one neuron was either accidentally excluded from the analysis or removed for an unknown reason.  
            If True, exclude this neuron. 
    Returns: 
        exclude_me_sp_ids: list of strings
            list of neuron by specimen ids to be eliminated from the structured data directory
    '''
    initial_sp_ids=[f[0:9] for f in os.listdir(folder_path)]
    print 'GENERAL EXCLUSIONS: there will be overlap in numbers below i.e. some models will be excluded for more than one reason'
    print '\tTotal number of preprocessed files:', len(initial_sp_ids)
    folders=[os.path.join(folder_path, f) for f in  os.listdir(folder_path)]
    
    strange_pp_exclusion=[] #exclusion for preprocessor files that do not have the correct format

    # exclude via slope and intercept from spike cutting results
    spike_cutting_exclusions=[]
    if spike_cut:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            pp_file=get_pp_path(folder)
            pp_dict=ju.read(pp_file)
            try:
                if pp_dict['spike_cutting']['NOdeltaV']['intercept'] > .03:
                    spike_cutting_exclusions.append(specimen_ID)
            except:
                print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE'
                strange_pp_exclusion.append(specimen_ID)   
        print '\t', len(set(spike_cutting_exclusions)), 'neurons were excluded for having an intercept larger than .03'
    
    # exclude based on the measured experimental threshold
    # note that the experimental threshold is the same for all models of the same neuron so just look at GLIF1 file.
    th_inf_exclusion_list=[]
    if th_inf_bad:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            pp_file=get_pp_path(folder)
            pp_dict=ju.read(pp_file)
            try:
                if pp_dict['th_inf']['via_Vmeasure']['value']< -.06:
                    th_inf_exclusion_list.append(specimen_ID)
            except:
                print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE'
                strange_pp_exclusion.append(specimen_ID)   

        print '\t', len(set(th_inf_exclusion_list)), 'neurons have a th_inf less than -60 mV'                   

    # exclude based on resistance
    resistance_exclusion_list=[]
    if resist:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            pp_file=get_pp_path(folder)
            pp_dict=ju.read(pp_file)
            try:
                if pp_dict['resistance']['R_test_list']['mean']>1000.e6:
                    resistance_exclusion_list.append(specimen_ID)
            except:
                print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE'
                strange_pp_exclusion.append(specimen_ID)    
        print '\t', len(set(resistance_exclusion_list)), 'neurons have a resistance fit WITHOUT ASC larger than 1000 MOhms.'

        print '\t', len(set(strange_pp_exclusion)), 'neurons have a strange looking preprocessor file.'

    # exclude based on explained variance on training data
    exp_var_exclusion_no_file=[]
    if ev:
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            try:
                file=get_file_path_endswith(folder, 'GLIF1_exp_var_ratio_10ms.json')
            except:
                exp_var_exclusion_no_file.append(specimen_ID) 
        print '\t', len(set(exp_var_exclusion_no_file)),'neurons have no explained variance file which means they probably had a empty array in a noise 1.  See calc_all_explained_variance.py variable model_GLIF1_n1_after'

    exp_var_exclusion_below=[]
    
    # the following mouse neurons were either accidentally excluded from the analysis or removed for a reason that eludes me now.
    accidental_exclusions=[]
    if accidental_exclusion:
        if os.path.isdir(os.path.join(folder_path,'569739534'+'_Chrna2-Cre_OE25')): #if this directory exists get rid of it.
            accidental_exclusions=['569739534']
        else: pass #neuron already excluded or not in directory
        print '\t', len(set(accidental_exclusions)),'neurons were excluded from the analysis by accident. Set accidental_exclusion flag to False to use it if reprocessing all data.'
 
    
    def check_ev_value(folder,ew):
        '''Checks to see if the explained variance of the training data (noise 1) is below the specified value.
        inputs:
            folder:
                path to folder where files are located
            ew: string
                specifies the unique end of a file name of the file searching for
        returns:
            Nothing.  Appends specimen IDs to be excluded to the 'exp_var_exclusion_below' list
        '''
        specimen_ID=os.path.basename(folder)[:9]
        try:
            file=get_file_path_endswith(folder, ew)  #if file doesnt exist this will fail
            dictionary=ju.read(file)
            if dictionary['after_opt']['noise_1']<ev:
                exp_var_exclusion_below.append(specimen_ID)
        except: 
            print 'cant find a file for', specimen_ID, 'this should not happen if the check_sweeps_and_rm_folders.py was run!'
            pass
        
        
    if ev:
        for folder in folders:
            check_ev_value(folder, 'GLIF1_exp_var_ratio_10ms.json')                
        print '\t', len(set(exp_var_exclusion_below)), 'neurons have a GLIF explained variance on noise 1 training data of less than', ev
                    

    # get the set of all neurons that are still included in analysis after the above exclusions
    init_excluded_id_list=list(set(spike_cutting_exclusions+
                              resistance_exclusion_list+
                              th_inf_exclusion_list+
                              strange_pp_exclusion+
                              exp_var_exclusion_no_file+
                              exp_var_exclusion_below+
                              accidental_exclusions))
    reduced_sp_ids=list(set(initial_sp_ids)-set(init_excluded_id_list)) # specimen ids remaining after above exclusions
    
    # remove data that does not have at least a specified number (n_in_cre) of neurons in a cre line    
    if n_in_cre is not False:
        small_cre_line_exclusion=np.array([])
        cre_list=[]
        for folder in folders:
            specimen_ID=os.path.basename(folder)[:9]
            if specimen_ID in reduced_sp_ids:
                cre_list.append({'sp':specimen_ID, 'cre': os.path.basename(folder)[10:]})
                
        df=pd.DataFrame(cre_list)
        for cre in df['cre'].unique():
            if len(df[df['cre']==cre])<n_in_cre:
                small_cre_line_exclusion=np.append(small_cre_line_exclusion, (df[df['cre']==cre]['sp'].values))

    # create list of specimen IDs whose folder should be completely eliminated
    exclude_me_sp_ids=list(set(small_cre_line_exclusion.tolist()+
                               init_excluded_id_list))

    print 'A total of',len(exclude_me_sp_ids), 'out of', len(folders), 'neurons are excluded via general exclusion criteria leaving',len(folders)-len(exclude_me_sp_ids), 'for this analysis'
    
    return exclude_me_sp_ids