示例#1
0
def get_polygon_regular(**kw):
    edge_num = search_dict(kw, ["point_num", "edge_num"])
    square_max_size = search_dict(kw, [
        "box_width", "edge_length", "box_size", "square_max_size",
        "square_size"
    ])
    direct_offset = search_dict(kw, ["direct", "direct_offset"])
    center_coord = search_dict(kw, ["center", "center_coord"],
                               default=[0.0, 0.0])
    center_x, center_y = center_coord[0], center_coord[1]

    r = square_max_size / 2
    theta_delta = math.pi * 2 / edge_num
    points = np.zeros([edge_num, 2])
    if direct_offset is None:
        if random_direct:
            theta_now = random.random(
            ) * theta_delta  # srandom.random() generate a random float in (0, 1)
        else:
            theta_now = 0.0
    else:
        theta_now = direct_offset
    for num in range(edge_num):
        x, y = polar2xy(r, theta_now)
        points[num, 0], points[num, 1] = x + center_x, y + center_y
        theta_now += theta_delta
    return points
示例#2
0
文件: Trainers.py 项目: wwf194/EINet
    def __init__(self, dict_, load=False, options=None):
        '''
        if options is not None:
            self.receive_options(options)
        else:
            raise Exception('Trainer: options is none.')
        '''
        self.dict = dict_
        
        '''
        self.epoch_now = get_from_dict(self.dict, 'epoch_now', default=self.epoch_start, write_default=True)
        self.epoch_start = get_from_dict(self.dict, 'epoch_start', default=1, write_default=True)
        self.epoch_end = get_from_dict(self.dict, 'epoch_end', default=self.epoch_um, write_default=True)
        '''        
        self.epoch_now = 0
        #print(self.dict.keys())
        self.epoch_num = self.dict['epoch_num']
        self.epoch_end = self.epoch_num - 1

        # save directory setting
        self.save_model_path = search_dict(self.dict, ['save_model_path', 'save_dir_model', 'save_path_model'], 
            default='./SavedModels/', write_default=True, write_default_key='save_model_path')
        #print(self.save_model_path)
        ensure_path(self.save_model_path)

        self.save_model = get_from_dict(self.dict, 'save_model', default=True, write_default=True)
        self.save_after_train = get_from_dict(self.dict, 'save_after_train', default=True, write_default=True)
        self.save_before_train = get_from_dict(self.dict, 'save_before_train', default=True, write_default=True)

        if self.save_model:
            self.save_interval = get_from_dict(self.dict, 'save_model_interval', default=True, write_default=True)

        self.anal_path = search_dict(self.dict, ['anal_path'], default='./', write_default=True)
        #print(self.anal_path)
        ensure_path(self.anal_path)
示例#3
0
    def __init__(self, dict_, load=False):
        if options is not None:
            self.receive_options(options)

        self.dict = dict_
        #set_instance_variable(self, self.dict)
        self.epoch_num = self.dict['epoch_num']
        self.batch_num = self.dict['batch_num']
        self.batch_size = self.dict['batch_size']

        if not hasattr(self, 'anal_path'):
            self.anal_path = self.dict.setdefault('anal_path', './anal/')
        '''
        self.epoch_index = get_from_dict(self.dict, 'epoch_index', default=self.epoch_start, write_default=True)
        self.epoch_start = get_from_dict(self.dict, 'epoch_start', default=1, write_default=True)
        self.epoch_end = get_from_dict(self.dict, 'epoch_end', default=self.epoch_um, write_default=True)
        '''
        self.epoch_index = 0
        self.epoch_end = self.epoch_num - 1

        # save directory setting
        self.save_path = search_dict(
            self.dict, ['save_path', 'save_model_path', 'save_dir_model'],
            default='./saved_models/',
            write_default=True,
            write_default_key='save_path')
        ensure_path(self.save_path)

        self.save = search_dict(self.dict, ['save', 'save_model'],
                                default=True,
                                write_default=True)
        self.save_after_train = get_from_dict(self.dict,
                                              'save_after_train',
                                              default=True,
                                              write_default=True)
        self.save_before_train = get_from_dict(self.dict,
                                               'save_before_train',
                                               default=True,
                                               write_default=True)
        self.anal_before_train = get_from_dict(self.dict,
                                               'anal_before_train',
                                               default=True,
                                               write_default=True)

        if self.save:
            self.save_interval = search_dict(
                self.dict, ['save_interval', 'save_model_interval'],
                default=int(self.epoch_num / 10),
                write_default=True)
        '''
        if options is not None:
            self.options = options
            self.set_options()
        '''
        self.test_performs = self.dict['test_performs'] = {}
        self.train_performs = self.dict['train_performs'] = {}

        self.anal_model = self.dict.setdefault('anal_model', True)
示例#4
0
def get_scaler(name, **params):
    if name == 'StandardScaler':
        return StandardScaler(
            **search_dict(params, ('with_mean', 'with_std', 'copy')))
    elif name == 'RobustScaler':
        return RobustScaler(
            **search_dict(params, ('with_centering', 'with_scaling',
                                   'quantile_range', 'copy')))
    elif name == 'MinMaxScaler':
        return MinMaxScaler(**search_dict(params, ('feature_range', 'copy')))
    elif name == 'MaxAbs':
        return MaxAbsScaler(**search_dict(params, ('copy', )))
    elif name == 'LogTransform':
        return LogTransform(**search_dict(params, ('base', 'pseudo_count')))
示例#5
0
    def bind_arenas(self, arenas, index=None):
        self.arenas = arenas
        if index is None:
            index = self.dict.setdefault('arena_index', 0)    
        self.arena = self.arenas.get_arena(index)
            
        if self.load:
            self.coords = self.dict['coords'].to(self.device)
            self.coords_np = self.coords.detach().cpu().numpy()
        else:
            self.coords_np = self.arena.get_random_xy(self.N_num) # [N_num, (x,y)]
            self.coords = self.dict['coords'] = torch.from_numpy(self.coords_np).to(self.device)
            '''
            x = torch.zeros((self.N_num)).to(device)
            torch.nn.init.uniform_(x, a=-self.box_width/2, b=self.box_width/2)
            y = torch.zeros((self.N_num)).to(device)
            torch.nn.init.uniform_(y, a=-self.box_height/2, b=self.box_height/2)
            self.coords = torch.stack([x, y], dim=1) #[pc_num, 2]
            self.dict['coords'] = self.coords
            '''
        self.xy = self.coords
        self.xy_np = self.coords_np
        self.type = self.dict['type']
        self.act_decay = search_dict(self.dict, ['act_decay', 'sigma'])
        self.act_decay_2 = self.act_decay ** 2
        self.act_center = search_dict(self.dict, ['act_center', 'peak'])
        self.norm_local = search_dict(self.dict, ['norm_local'], default=True, write_default=True)

        #print('PlaceCells: type:%s'%self.type)
        if self.type in ['diff_gaussian', 'diff_gauss']:
            self.get_act = self.get_act_dual_
            self.act_ratio = self.dict['act_ratio']
            self.act_positive = self.dict['act_positive']
            self.act_ratio_2 = self.act_ratio ** 2
            self.act_ratio_4 = self.act_ratio ** 4
            # minimum of difference gaussian curve is (ratio^4 ** (ratio^2/(1-ratio^4)) - 1/ratio^2 * ratio^4 ** (1/(1-ratio^4)))
            self.minimum = self.act_ratio_4 ** ( self.act_ratio_2 / (1 - self.act_ratio_2) ) - ( 1 / self.act_ratio_2 ) * ( self.act_ratio_4 ** ( 1 / (1 - self.act_ratio_2)) )
            self.separate_softmax = search_dict(self.dict, ['separate_softmax'], default=False, write_default=True)

            #print('act_positive:%s'%(str(self.act_positive)))
        else:
            self.get_act = self.get_activation = self.get_act_single
        if self.verbose:
            print('Place_Cells: type:%s act_decay:%f act_center:%f norm_local:%s separate_softmax:%s'% \
                (self.type, self.act_decay, self.act_center, self.norm_local, self.separate_softmax))
示例#6
0
    def update_lr_init(
        self
    ):  # define self.scheduler and return an update_lr method according to settings in self.dict_.
        #self.lr_decay = self.dict['lr_decay']
        lr_decay = self.lr_decay = self.dict['lr_decay']
        lr_decay_method = lr_decay.get('method')
        print(lr_decay_method)
        if lr_decay_method in ['None', 'none'] or lr_decay_method is None:

            return self.update_lr_none
        elif lr_decay_method in ['exp']:
            decay = search_dict(lr_decay, ['decay', 'coeff'],
                                default=0.98,
                                write_default=True,
                                write_default_dict='decay')
            self.scheduler = torch.optim.lr_scheduler.ExponentialLR(
                self.optimizer, gamma=decay)
            return self.update_lr
        elif lr_decay_method in ['stepLR', 'exp_interval']:
            decay = search_dict(lr_decay, ['decay', 'coeff'],
                                default=0.98,
                                write_default=True,
                                write_default_key='decay')
            step_size = search_dict(lr_decay, ['interval', 'step_size'],
                                    default=0.98,
                                    write_default=True,
                                    write_default_key='decay')
            self.scheduler = torch.optim.lr_scheduler.ExponentialLR(
                self.optimizer, step_size=step_size, gamma=decay)
            return self.update_lr
        elif lr_decay_method in ['Linear', 'linear']:
            milestones = search_dict(lr_decay, ['milestones'],
                                     throw_none_error=True)
            self.scheduler = LinearLR(self.optimizer,
                                      milestones=milestones,
                                      epoch_num=self.trainer.epoch_num)
            return self.update_lr
        else:
            raise Exception('Invalid lr decay method: ' + str(lr_decay_method))
示例#7
0
    def update_before_train(self):
        print(self.dict['update_before_train'])
        self.update_before_train_items = search_dict(self.dict,
                                                     ['update_before_train'],
                                                     default=[],
                                                     write_default=True)

        for item in self.update_before_train_items:
            if item in ['alt_pc_act_strength', 'alt_pc_strength']:
                path = self.trainer.agent.walk_random(
                    num=self.trainer.batch_size)
                self.model.alt_pc_act_strength(path)
            else:
                raise Exception('Invalid update_before_train item: %s' %
                                str(item))
示例#8
0
def get_splitter(random_state=None, **params):
    '''Get cross-validation index generator

    Parameters:
        random_state: int or RandomState object
            seed for random number generator 
        
        name: str
            name of the splitter

        params: keyword arguments
            extra parameters for the classifier
    
    Returns:
        estimator: object
            a BaseEstimator object
    '''
    from sklearn.model_selection import KFold, StratifiedKFold, ShuffleSplit, LeaveOneOut, \
        RepeatedKFold, RepeatedStratifiedKFold, LeaveOneOut, StratifiedShuffleSplit

    splitter = params.get('splitter')
    if splitter is None:
        return check_cv(**params)
    if splitter == 'KFold':
        from sklearn.model_selection import KFold
        return KFold(random_state=random_state,
                     **search_dict(params, ('n_splits', 'shuffle')))
    elif splitter == 'StratifiedKFold':
        from sklearn.model_selection import StratifiedKFold
        return StratifiedKFold(random_state=random_state,
                               **search_dict(params, ('n_splits', 'shuffle')))
    elif splitter == 'RepeatedStratifiedKFold':
        from sklearn.model_selection import RepeatedStratifiedKFold
        return RepeatedStratifiedKFold(random_state=random_state,
                                       **search_dict(
                                           params, ('n_splits', 'n_repeats')))
    elif splitter == 'ShuffleSplit':
        from sklearn.model_selection import ShuffleSplit
        return ShuffleSplit(
            random_state=random_state,
            **search_dict(params, ('n_splits', 'test_size', 'train_size')))
    elif splitter == 'StratifiedShuffleSplit':
        from sklearn.model_selection import StratifiedShuffleSplit
        return StratifiedShuffleSplit(
            random_state=random_state,
            **search_dict(params, ('n_splits', 'test_size', 'train_size')))
    elif splitter == 'LeaveOneOut':
        from sklearn.model_selection import LeaveOneOut
        return LeaveOneOut()
    elif splitter == 'FileSplitter':
        return UserFileSplitter(**search_dict(params, 'filename'))
    else:
        raise ValueError('unknown splitter: {}'.format(splitter))
示例#9
0
    def __init__(self, dict_=None, load=False, f=None):
        super(RSLP, self).__init__()
        self.dict = dict_
        self.device = self.dict['device']
        
        #set_instance_attr(self, self.dict, exception=['N'])
        #selo = Neurons_LIF(dict_ = self.dict['N'], load=load)
        #self.output_num = self.output_num
        
        # weight settings
        if load:
            #self.dict = torch.load(f, map_location=self.device) 
            self.i = self.dict['i'] # input weight
            self.register_parameter('i', self.i)
            self.i_b = self.dict['i_b'] # input bias
            if isinstance(self.i_b, torch.Tensor):
                self.register_parameter('i_b', self.i_b)
            self.o = self.dict['o'] # output weight
            self.register_parameter('o', self.o)
            self.r = self.dict['r'] # recurrent weight
            self.register_parameter('r', self.r)
            self.r_b = self.dict['r_b'] # recurrent bias
            if isinstance(self.i_b, torch.Tensor):
                self.register_parameter('i_b', self.i_b)            
            if self.dict['init_weight'] in ['nonzero']:
                self.h_init = self.dict['h_init'] # init hidden state
                self.register_parameter('h_init', self.h_init)
        else:
            self.i = torch.nn.Parameter(torch.zeros((self.dict['input_num'], self.dict['N_num']), device=self.device))
            
            self.dict['i'] = self.i
            
            if self.dict['bias']:
                self.i_b = torch.nn.Parameter(torch.zeros((self.dict['N_num']), device=self.device))            
            else:
                self.i_b = 0.0
            self.dict['b_0'] = self.i_b

            self.dict['r_b'] = search_dict(self.dict, ['r_b', 'bias'], default=True, write_default=False)
            if self.dict['r_b']:
                self.r_b = self.dict['r_b'] = torch.nn.Parameter(torch.zeros((self.dict['input_num']), device=self.device))
            else:
                self.r_b = self.dict['r_b'] = 0.0
            self.o = self.dict['o'] = nn.Parameter(torch.zeros((self.dict['N_num'], self.dict['output_num']), device=self.device, requires_grad=True))
            self.r = self.dict['r'] = nn.Parameter(torch.zeros((self.dict['N_num'], self.dict['N_num']), device=self.device, requires_grad=True))
            
            if self.dict.get('init_weight') is None:
                self.dict['init_weight'] = {
                    'r': ['input', 1.0],
                    'o': ['input', 1.0],
                    'i': ['input', 1.0],
                }

            init_weight(self.i, self.dict['init_weight']['i'])
            init_weight(self.r, self.dict['init_weight']['r'])
            init_weight(self.o, self.dict['init_weight']['o'])

        # set up basic attributes
        self.step_num = self.dict['step_num']
        self.N_num = self.dict['N_num']
        if self.dict['separate_ei']:
            self.time_const_e = self.dict['time_const_e']
            self.time_const_i = self.dict['time_const_i']
            self.act_func = self.get_act_func_ei()
            self.act_func_e = get_act_func(self.dict['act_func_e'])
            self.act_func_i = get_act_func(self.dict['act_func_i'])
            self.E_num = self.dict['E_num']
            self.I_num = self.dict['I_num']
            self.cal_s = self.cal_s_ei
            self.get_weight = self.get_weight_ei
            self.response = self.response_ei
            self.cache_weight = self.cache_weight_ei
            #self.weight_name = ['E->E','E->I','I->E','I->I','E->Y','I->Y','N->N','E.r','E.l','I.r','I.l','E.b','I.b','r','E.o','I.o','b']
            self.response_keys = ['E.u','E.x','I.u','I.x','E->E','E->I','I->E','I->I','E->Y','I->Y', 'X->E', 'X->I', 'N->Y', 'N->N', 'u']
        else:
            self.time_const = self.dict['time_const']
            self.act_func = get_act_func(self.dict['act_func'])
            self.cal_s = self.cal_s_uni
            self.get_weight = self.get_weight_uni
            self.response = self.response_uni
            self.cache_weight = self.cache_weight_uni
            #self.weight_name = ['X->E', 'X->I', 'i', 'N->Y','N->N', 'N.o', 'o', 'b', 'r']
            self.response_keys = ['o','r','u']

        # set up input weight
        self.get_i = lambda :self.i

        # set up recurrent weight
        if self.dict['noself']:
            self.r_self_mask = torch.ones((self.dict['N_num'], self.dict['N_num']), device=self.device, requires_grad=False)
            for i in range(self.dict['N_num']):
                self.r_self_mask[i][i] = 0.0
            self.get_r_noself = lambda :self.r * self.r_self_mask
        else:
            self.get_r_noself = lambda :self.r
        self.ei_mask = None
        
        self.cons_func = get_cons_func(self.dict['cons_method'])
        if 'r' in self.dict['Dale']:
            self.ei_mask = get_ei_mask(E_num=self.dict['E_num'], N_num=self.dict['N_num']).to(self.device)
            self.get_r_ei = lambda :torch.mm(self.ei_mask, self.cons_func(self.get_r_noself()))
        else:
            self.get_r_ei = self.get_r_noself
        if 'r' in self.dict['mask']:
            self.r_mask = get_mask(N_num=self.dict['N_num'], output_num=self.dict['N_num']).to(self.device)
            self.get_r_mask = lambda :self.r_mask * self.get_r_ei()
        else:
            self.get_r_mask = self.get_r_ei
            
        self.get_r = self.get_r_mask

        # set up forward weight
        if 'o' in self.dict['Dale']: #set mask for EI separation
            if(self.ei_mask is None):
                self.ei_mask = get_ei_mask(E_num=self.dict['E_num'], N_num=self.dict['N_num'])
            self.get_o_ei = lambda :torch.mm(self.ei_mask, self.cons_func(self.o))
        else:
            self.get_o_ei = lambda :self.o
        if 'o' in self.dict['mask']: #set mask for connection pruning
            self.o_mask = get_mask(N_num=self.dict['N_num'], output_num=self.dict['output_num'])
            self.get_o_mask = lambda :self.o_mask * self.get_o_ei()
        else:
            self.get_o_mask = self.get_o_ei            
        self.get_o = self.get_o_mask



        # set up method to generate initial s, h
        self.init_mode = self.dict.setdefault('init_mode', 'zero')
        if self.init_mode in ['zero']:
            self.get_s_h_init = self.get_s_h_init_zero
        elif self.init_mode in ['learnable', 'fixed']:
            self.get_s_h_init = self.get_s_h_init_fixed
        else:
            raise Exception('Invalid s and h init mode: %s'%self.init_mode)

        # set up method to generate noise
        if self.dict['noise_coeff'] == 0.0:
            self.get_noise = lambda batch_size, N_num:0.0
        else:
            self.get_noise = self.get_noise_gaussian

        # loss settings
        self.loss_dict = self.dict['loss']

        self.main_loss_func = get_loss_func(self.loss_dict['main_loss'], truth_is_label=True, num_class=self.loss_dict['num_class'])
        '''
        if self.loss_dict['main_loss'] in ['CEL', 'cel']:
            self.main_loss_func = torch.nn.CrossEntropyLoss()
        elif self.loss_dict['main_loss'] in ['MSE', 'mse']:
            self.main_loss_func = torch.nn.MSELoss()
        '''

        input_mode = get_name(self.dict['input_mode'])
        if input_mode in ['endure'] or input_mode is None: #default
            self.prep_input = self.prep_input_endure
            self.get_input = self.get_input_endure

        self.main_loss_coeff = self.loss_dict['main_loss_coeff']

        self.hebb_coeff = self.loss_dict.setdefault('hebb_coeff', 0.0)
        self.act_coeff = self.loss_dict.setdefault('act_coeff', 0.0)
        self.weight_coeff = self.loss_dict.setdefault('weight_coeff', 0.0)

        # performance log settings
        self.perform_list = {'class':0.0, 'act':0.0, 'weight':0.0, 'acc':0.0}
        if self.hebb_coeff != 0.0:
            self.perform_list['hebb'] = 0.0
        self.batch_count = 0
        self.sample_count = 0
        
        self.cache = {}
示例#10
0
def get_selector(name, estimator=None, n_features_to_select=None, **params):
    if name == 'RobustSelector':
        return RobustSelector(estimator, n_features_to_select=n_features_to_select, **search_dict(params,
         ('cv', 'verbose')))
    elif name == 'MaxFeatures':
        return SelectFromModel(estimator, threshold=-np.inf, max_features=n_features_to_select)
    elif name == 'RandomSubsetSelector':
        return RandomSubsetSelector(estimator, n_features_to_select=n_features_to_select, **search_dict(params,
        ('n_subsets', 'subset_size', 'random_state')))
    elif name == 'FeatureImportanceThreshold':
        return SelectFromModel(estimator, **search_dict(params, 'threshold'))
    elif name == 'RFE':
        return RFE(estimator, n_features_to_select=n_features_to_select, **search_dict(params, 
        ('step', 'verbose')))
    elif name == 'RFECV':
        return RFECV(estimator, n_features_to_select=n_features_to_select, **search_dict(params,
         ('step', 'cv', 'verbose')))
    elif name == 'FoldChangeFilter':
        return FoldChangeFilter(**search_dict(params,
        ('threshold', 'direction', 'below', 'pseudo_count')))
    elif name == 'ZeroFractionFilter':
        return ZeroFractionFilter(**search_dict(params,
        ('threshold',)))
    elif name == 'RpkmFilter':
        return RpkmFilter(**search_dict(params,
        ('threshold',)))
    elif name == 'RpmFilter':
        return RpmFilter(**search_dict(params,
        ('threshold',)))
    elif name == 'DiffExpFilter':
        return DiffExpFilter(max_features=n_features_to_select, **search_dict(params,
        ('threshold', 'script', 'temp_dir', 'score_type', 'method')))
    elif name == 'ReliefF':
        from skrebate import ReliefF
        return ReliefF(n_features_to_select=n_features_to_select,
            **search_dict(params, ('n_jobs', 'n_neighbors', 'discrete_limit')))
    elif name == 'SURF':
        from skrebate import SURF
        return SURF(n_features_to_select=n_features_to_select,
            **search_dict(params, ('n_jobs', 'discrete_limit')))
    elif name == 'MultiSURF':
        from skrebate import MultiSURF
        return MultiSURF(n_features_to_select=n_features_to_select,
            **search_dict(params, ('n_jobs', 'discrete_limit')))
    elif name == 'SIS':
        return SIS(n_features_to_select=n_features_to_select, 
            **search_dict(params, ('temp_dir', 'sis_params')))
    elif name == 'NullSelector':
        return NullSelector()
    else:
        raise ValueError('unknown selector: {}'.format(name))
示例#11
0
def get_classifier(name, **params):
    '''Get scoring function from string

    Parameters:
        name: str
            name of the clasifier

        params: keyword arguments
            extra parameters for the classifier
    
    Returns:
        estimator: object
            a BaseEstimator object
    '''
    if name == 'LogisticRegression':
        return LogisticRegression(
            **search_dict(params, ('penalty', 'dual', 'C', 'tol',
                                   'fit_intercept', 'solver', 'class_weight',
                                   'max_iter', 'n_jobs', 'random_state',
                                   'verbose')))
    elif name == 'LogisticRegressionL1':
        return LogisticRegression(
            penalty='l1',
            **search_dict(
                params,
                ('dual', 'C', 'tol', 'fit_intercept', 'solver', 'class_weight',
                 'max_iter', 'n_jobs', 'random_state', 'verbose')))
    elif name == 'LogisticRegressionL2':
        return LogisticRegression(
            penalty='l2',
            **search_dict(
                params,
                ('dual', 'C', 'tol', 'fit_intercept', 'solver', 'class_weight',
                 'max_iter', 'n_jobs', 'random_state', 'verbose')))
    elif name == 'RandomForestClassifier':
        return RandomForestClassifier(
            **search_dict(params, ('n_estimators', 'criterion', 'max_depth',
                                   'min_samples_split', 'min_samples_leaf',
                                   'min_weight_fraction_leaf', 'max_features',
                                   'max_leaf_nodes', 'min_impurity_decrease',
                                   'min_impurity_split', 'oob_score', 'n_jobs',
                                   'verbose', 'random_state', 'class_weight')))
    elif name == 'LinearSVC':
        return LinearSVC(
            **search_dict(params, ('penalty', 'loss', 'dual', 'tol', 'C',
                                   'fit_intercept', 'intercept_scaling',
                                   'class_weight', 'verbose', 'random_state',
                                   'max_iter')))
    elif name == 'SVC':
        return SVC(
            **search_dict(params, ('penalty', 'loss', 'dual', 'tol', 'C',
                                   'fit_intercept', 'gamma',
                                   'intercept_scaling', 'class_weight',
                                   'verbose', 'random_state', 'max_iter')))
    elif name == 'DecisionTreeClassifier':
        return DecisionTreeClassifier(
            **search_dict(params, ('criterion', 'splitter', 'max_depth',
                                   'min_samples_split', 'min_samples_leaf',
                                   'min_weight_fraction_leaf', 'max_features',
                                   'max_leaf_nodes', 'min_impurity_decrease',
                                   'min_impurity_split')))
    elif name == 'ExtraTreesClassifier':
        return ExtraTreesClassifier(
            **search_dict(params, ('n_estimators', 'criterion', 'max_depth',
                                   'min_samples_split', 'min_samples_leaf',
                                   'min_weight_fraction_leaf', 'max_features',
                                   'max_leaf_nodes', 'min_impurity_decrease',
                                   'min_impurity_split', 'oob_score', 'n_jobs',
                                   'verbose', 'random_state', 'class_weight')))
    elif name == 'MLPClassifier':
        from sklearn.neural_network import MLPClassifier
        return MLPClassifier(
            **search_dict(params, ('hidden_layer_sizes', 'activation',
                                   'solver', 'alpha', 'batch_size',
                                   'learning_rate', 'max_iter')))
    elif name == 'SGDClassifier':
        from sklearn.linear_model import SGDClassifier
        return SGDClassifier(
            **search_dict(params, ('loss', 'penalty', 'alpha', 'l1_ratio',
                                   'fit_intercept', 'max_iter', 'tol',
                                   'epsilon')))
    else:
        raise ValueError('unknown classifier: {}'.format(name))
示例#12
0
 def fit(self, X, y=None, sample_weight=None):
     self.preprocess_steps_ = []
     if self.zero_fraction_filter:
         logger.debug('add zero_fraction_filter with parameters: {}'.format(
             self.zero_fraction_filter_params))
         self.preprocess_steps_.append(
             ('zero_fraction_filter',
              get_selector('zero_fraction_filter',
                           **self.zero_fraction_filter_params)))
     '''
     if self.rpkm_filter:
         logger.debug('add rpkm_filter with parameters: {}'.format(self.rpkm_filter_params))
         if self.feature_names is None:
             raise ValueError('feature_names is required for rpkm_filter')
         gene_lengths = self.get_gene_lengths_from_feature_names(feature_names)
         step = get_selector('rpkm_filter', **rpkm_filter_params)
         step.set_gene_lengths(gene_lengths)
         preprocess_steps.append(('rpkm_filter', step))
     '''
     if self.rpm_filter:
         logger.debug('add rpm_filter with parameters: {}'.format(
             self.rpm_filter_params))
         self.preprocess_steps_.append(
             ('rpm_filter',
              get_selector('rpm_filter', **self.rpkm_filter_params)))
     if self.fold_change_filter:
         logger.debug('add fold_change_filter with parameters: {}'.format(
             self.fold_change_filter_params))
         self.preprocess_steps_.append(
             ('fold_change_filter',
              get_selector('fold_change_filter',
                           **self.fold_change_filter_params)))
     if self.diffexp_filter:
         logger.debug('add diffexp_filter with parameters: {}'.format(
             self.diffexp_filter_params))
         self.preprocess_steps_.append(
             ('diffexp_filter',
              get_selector('diffexp_filter', **self.diffexp_filter_params)))
     if self.log_transform:
         logger.debug('add log_transform with parameters: {}'.format(
             self.log_transform_params))
         self.preprocess_steps_.append(
             ('log_transform',
              get_scaler('log_transform', **self.log_transform_params)))
     if self.scaler is not None:
         logger.debug('add scaler "{}" with parameters: {}'.format(
             self.scaler, self.scaler_params))
         self.preprocess_steps_.append(
             ('scaler', get_scaler(self.scaler, **self.scaler_params)))
     # preprocess features
     X_new = X
     self.features_ = np.arange(X.shape[1])
     for name, step in self.preprocess_steps_:
         X_new = step.fit_transform(X_new, y)
         setattr(self, name + '_', step)
         if isinstance(step, SelectorMixin):
             self.features_ = self.features_[step.get_support()]
     logger.debug('add classifier "{}" with parameters: {}'.format(
         self.classifier, self.classifier_params))
     self.classifier_ = get_classifier(self.classifier,
                                       **self.classifier_params)
     # grid search for hyper-parameters
     if self.grid_search:
         logger.debug('add grid_search with parameters: {}'.format(
             self.grid_search_params))
         grid_search_params = deepcopy(self.grid_search_params)
         if 'cv' in grid_search_params:
             grid_search_params['cv'] = get_splitter(
                 **grid_search_params['cv'])
         grid_search_params['param_grid'] = grid_search_params[
             'param_grid'][self.classifier]
         self.grid_search_ = GridSearchCV(
             estimator=self.classifier_,
             **search_dict(
                 grid_search_params,
                 ('param_grid', 'scoring', 'cv', 'fit_params', 'verbose',
                  'return_train_score', 'error_score', 'iid')))
         self.grid_search_.fit(X_new, y, sample_weight=sample_weight)
         self.classfier_ = self.grid_search_.best_estimator_
         self.best_classifier_params_ = self.grid_search_.best_params_
         self.classifier_.set_params(**self.grid_search_.best_params_)
         #logger.info('best params: {}'.format(self.grid_search_.best_params_))
         #logger.info('mean test score: {}'.format(self.grid_search_.cv_results_['mean_test_score']))
     # feature selection
     if self.selector:
         logger.debug('add selector "{}" with parameters: {}'.format(
             self.selector, self.selector_params))
         logger.debug('number of features to select: {}'.format(
             self.n_features_to_select))
         # classifier for feature selection wrapper
         selector_classifier = None
         if 'classifier' in self.selector_params:
             selector_classifier = get_classifier(
                 self.selector_params['classifier'],
                 **self.selector_params['classifier_params'])
         else:
             selector_classifier = self.classifier_
         self.selector_ = get_selector(
             self.selector,
             estimator=selector_classifier,
             n_features_to_select=self.n_features_to_select,
             **self.selector_params)
         X_new = self.selector_.fit_transform(X_new, y)
         self.features_ = self.features_[self.selector_.get_support()]
     # refit the classifier with selected features
     self.classifier_.fit(X_new, y, sample_weight=sample_weight)
     # set feature importances
     self.feature_importances_ = get_feature_importances(self.classifier_)
     return self
示例#13
0
from youtube_api import YoutubeDataApi
import pandas as pd
from utils import search_dict
from utils import API_KEY

query = 'Juul'
youtube = YoutubeDataApi(key=API_KEY)
searches = youtube.search(q=query)
print(len(searches))
searches = [search_dict(search) for search in searches]
searches_df = pd.DataFrame(searches)
searches_df.to_csv('{}_searches.csv'.format(query), index=False)