def get_polygon_regular(**kw): edge_num = search_dict(kw, ["point_num", "edge_num"]) square_max_size = search_dict(kw, [ "box_width", "edge_length", "box_size", "square_max_size", "square_size" ]) direct_offset = search_dict(kw, ["direct", "direct_offset"]) center_coord = search_dict(kw, ["center", "center_coord"], default=[0.0, 0.0]) center_x, center_y = center_coord[0], center_coord[1] r = square_max_size / 2 theta_delta = math.pi * 2 / edge_num points = np.zeros([edge_num, 2]) if direct_offset is None: if random_direct: theta_now = random.random( ) * theta_delta # srandom.random() generate a random float in (0, 1) else: theta_now = 0.0 else: theta_now = direct_offset for num in range(edge_num): x, y = polar2xy(r, theta_now) points[num, 0], points[num, 1] = x + center_x, y + center_y theta_now += theta_delta return points
def __init__(self, dict_, load=False, options=None): ''' if options is not None: self.receive_options(options) else: raise Exception('Trainer: options is none.') ''' self.dict = dict_ ''' self.epoch_now = get_from_dict(self.dict, 'epoch_now', default=self.epoch_start, write_default=True) self.epoch_start = get_from_dict(self.dict, 'epoch_start', default=1, write_default=True) self.epoch_end = get_from_dict(self.dict, 'epoch_end', default=self.epoch_um, write_default=True) ''' self.epoch_now = 0 #print(self.dict.keys()) self.epoch_num = self.dict['epoch_num'] self.epoch_end = self.epoch_num - 1 # save directory setting self.save_model_path = search_dict(self.dict, ['save_model_path', 'save_dir_model', 'save_path_model'], default='./SavedModels/', write_default=True, write_default_key='save_model_path') #print(self.save_model_path) ensure_path(self.save_model_path) self.save_model = get_from_dict(self.dict, 'save_model', default=True, write_default=True) self.save_after_train = get_from_dict(self.dict, 'save_after_train', default=True, write_default=True) self.save_before_train = get_from_dict(self.dict, 'save_before_train', default=True, write_default=True) if self.save_model: self.save_interval = get_from_dict(self.dict, 'save_model_interval', default=True, write_default=True) self.anal_path = search_dict(self.dict, ['anal_path'], default='./', write_default=True) #print(self.anal_path) ensure_path(self.anal_path)
def __init__(self, dict_, load=False): if options is not None: self.receive_options(options) self.dict = dict_ #set_instance_variable(self, self.dict) self.epoch_num = self.dict['epoch_num'] self.batch_num = self.dict['batch_num'] self.batch_size = self.dict['batch_size'] if not hasattr(self, 'anal_path'): self.anal_path = self.dict.setdefault('anal_path', './anal/') ''' self.epoch_index = get_from_dict(self.dict, 'epoch_index', default=self.epoch_start, write_default=True) self.epoch_start = get_from_dict(self.dict, 'epoch_start', default=1, write_default=True) self.epoch_end = get_from_dict(self.dict, 'epoch_end', default=self.epoch_um, write_default=True) ''' self.epoch_index = 0 self.epoch_end = self.epoch_num - 1 # save directory setting self.save_path = search_dict( self.dict, ['save_path', 'save_model_path', 'save_dir_model'], default='./saved_models/', write_default=True, write_default_key='save_path') ensure_path(self.save_path) self.save = search_dict(self.dict, ['save', 'save_model'], default=True, write_default=True) self.save_after_train = get_from_dict(self.dict, 'save_after_train', default=True, write_default=True) self.save_before_train = get_from_dict(self.dict, 'save_before_train', default=True, write_default=True) self.anal_before_train = get_from_dict(self.dict, 'anal_before_train', default=True, write_default=True) if self.save: self.save_interval = search_dict( self.dict, ['save_interval', 'save_model_interval'], default=int(self.epoch_num / 10), write_default=True) ''' if options is not None: self.options = options self.set_options() ''' self.test_performs = self.dict['test_performs'] = {} self.train_performs = self.dict['train_performs'] = {} self.anal_model = self.dict.setdefault('anal_model', True)
def get_scaler(name, **params): if name == 'StandardScaler': return StandardScaler( **search_dict(params, ('with_mean', 'with_std', 'copy'))) elif name == 'RobustScaler': return RobustScaler( **search_dict(params, ('with_centering', 'with_scaling', 'quantile_range', 'copy'))) elif name == 'MinMaxScaler': return MinMaxScaler(**search_dict(params, ('feature_range', 'copy'))) elif name == 'MaxAbs': return MaxAbsScaler(**search_dict(params, ('copy', ))) elif name == 'LogTransform': return LogTransform(**search_dict(params, ('base', 'pseudo_count')))
def bind_arenas(self, arenas, index=None): self.arenas = arenas if index is None: index = self.dict.setdefault('arena_index', 0) self.arena = self.arenas.get_arena(index) if self.load: self.coords = self.dict['coords'].to(self.device) self.coords_np = self.coords.detach().cpu().numpy() else: self.coords_np = self.arena.get_random_xy(self.N_num) # [N_num, (x,y)] self.coords = self.dict['coords'] = torch.from_numpy(self.coords_np).to(self.device) ''' x = torch.zeros((self.N_num)).to(device) torch.nn.init.uniform_(x, a=-self.box_width/2, b=self.box_width/2) y = torch.zeros((self.N_num)).to(device) torch.nn.init.uniform_(y, a=-self.box_height/2, b=self.box_height/2) self.coords = torch.stack([x, y], dim=1) #[pc_num, 2] self.dict['coords'] = self.coords ''' self.xy = self.coords self.xy_np = self.coords_np self.type = self.dict['type'] self.act_decay = search_dict(self.dict, ['act_decay', 'sigma']) self.act_decay_2 = self.act_decay ** 2 self.act_center = search_dict(self.dict, ['act_center', 'peak']) self.norm_local = search_dict(self.dict, ['norm_local'], default=True, write_default=True) #print('PlaceCells: type:%s'%self.type) if self.type in ['diff_gaussian', 'diff_gauss']: self.get_act = self.get_act_dual_ self.act_ratio = self.dict['act_ratio'] self.act_positive = self.dict['act_positive'] self.act_ratio_2 = self.act_ratio ** 2 self.act_ratio_4 = self.act_ratio ** 4 # minimum of difference gaussian curve is (ratio^4 ** (ratio^2/(1-ratio^4)) - 1/ratio^2 * ratio^4 ** (1/(1-ratio^4))) self.minimum = self.act_ratio_4 ** ( self.act_ratio_2 / (1 - self.act_ratio_2) ) - ( 1 / self.act_ratio_2 ) * ( self.act_ratio_4 ** ( 1 / (1 - self.act_ratio_2)) ) self.separate_softmax = search_dict(self.dict, ['separate_softmax'], default=False, write_default=True) #print('act_positive:%s'%(str(self.act_positive))) else: self.get_act = self.get_activation = self.get_act_single if self.verbose: print('Place_Cells: type:%s act_decay:%f act_center:%f norm_local:%s separate_softmax:%s'% \ (self.type, self.act_decay, self.act_center, self.norm_local, self.separate_softmax))
def update_lr_init( self ): # define self.scheduler and return an update_lr method according to settings in self.dict_. #self.lr_decay = self.dict['lr_decay'] lr_decay = self.lr_decay = self.dict['lr_decay'] lr_decay_method = lr_decay.get('method') print(lr_decay_method) if lr_decay_method in ['None', 'none'] or lr_decay_method is None: return self.update_lr_none elif lr_decay_method in ['exp']: decay = search_dict(lr_decay, ['decay', 'coeff'], default=0.98, write_default=True, write_default_dict='decay') self.scheduler = torch.optim.lr_scheduler.ExponentialLR( self.optimizer, gamma=decay) return self.update_lr elif lr_decay_method in ['stepLR', 'exp_interval']: decay = search_dict(lr_decay, ['decay', 'coeff'], default=0.98, write_default=True, write_default_key='decay') step_size = search_dict(lr_decay, ['interval', 'step_size'], default=0.98, write_default=True, write_default_key='decay') self.scheduler = torch.optim.lr_scheduler.ExponentialLR( self.optimizer, step_size=step_size, gamma=decay) return self.update_lr elif lr_decay_method in ['Linear', 'linear']: milestones = search_dict(lr_decay, ['milestones'], throw_none_error=True) self.scheduler = LinearLR(self.optimizer, milestones=milestones, epoch_num=self.trainer.epoch_num) return self.update_lr else: raise Exception('Invalid lr decay method: ' + str(lr_decay_method))
def update_before_train(self): print(self.dict['update_before_train']) self.update_before_train_items = search_dict(self.dict, ['update_before_train'], default=[], write_default=True) for item in self.update_before_train_items: if item in ['alt_pc_act_strength', 'alt_pc_strength']: path = self.trainer.agent.walk_random( num=self.trainer.batch_size) self.model.alt_pc_act_strength(path) else: raise Exception('Invalid update_before_train item: %s' % str(item))
def get_splitter(random_state=None, **params): '''Get cross-validation index generator Parameters: random_state: int or RandomState object seed for random number generator name: str name of the splitter params: keyword arguments extra parameters for the classifier Returns: estimator: object a BaseEstimator object ''' from sklearn.model_selection import KFold, StratifiedKFold, ShuffleSplit, LeaveOneOut, \ RepeatedKFold, RepeatedStratifiedKFold, LeaveOneOut, StratifiedShuffleSplit splitter = params.get('splitter') if splitter is None: return check_cv(**params) if splitter == 'KFold': from sklearn.model_selection import KFold return KFold(random_state=random_state, **search_dict(params, ('n_splits', 'shuffle'))) elif splitter == 'StratifiedKFold': from sklearn.model_selection import StratifiedKFold return StratifiedKFold(random_state=random_state, **search_dict(params, ('n_splits', 'shuffle'))) elif splitter == 'RepeatedStratifiedKFold': from sklearn.model_selection import RepeatedStratifiedKFold return RepeatedStratifiedKFold(random_state=random_state, **search_dict( params, ('n_splits', 'n_repeats'))) elif splitter == 'ShuffleSplit': from sklearn.model_selection import ShuffleSplit return ShuffleSplit( random_state=random_state, **search_dict(params, ('n_splits', 'test_size', 'train_size'))) elif splitter == 'StratifiedShuffleSplit': from sklearn.model_selection import StratifiedShuffleSplit return StratifiedShuffleSplit( random_state=random_state, **search_dict(params, ('n_splits', 'test_size', 'train_size'))) elif splitter == 'LeaveOneOut': from sklearn.model_selection import LeaveOneOut return LeaveOneOut() elif splitter == 'FileSplitter': return UserFileSplitter(**search_dict(params, 'filename')) else: raise ValueError('unknown splitter: {}'.format(splitter))
def __init__(self, dict_=None, load=False, f=None): super(RSLP, self).__init__() self.dict = dict_ self.device = self.dict['device'] #set_instance_attr(self, self.dict, exception=['N']) #selo = Neurons_LIF(dict_ = self.dict['N'], load=load) #self.output_num = self.output_num # weight settings if load: #self.dict = torch.load(f, map_location=self.device) self.i = self.dict['i'] # input weight self.register_parameter('i', self.i) self.i_b = self.dict['i_b'] # input bias if isinstance(self.i_b, torch.Tensor): self.register_parameter('i_b', self.i_b) self.o = self.dict['o'] # output weight self.register_parameter('o', self.o) self.r = self.dict['r'] # recurrent weight self.register_parameter('r', self.r) self.r_b = self.dict['r_b'] # recurrent bias if isinstance(self.i_b, torch.Tensor): self.register_parameter('i_b', self.i_b) if self.dict['init_weight'] in ['nonzero']: self.h_init = self.dict['h_init'] # init hidden state self.register_parameter('h_init', self.h_init) else: self.i = torch.nn.Parameter(torch.zeros((self.dict['input_num'], self.dict['N_num']), device=self.device)) self.dict['i'] = self.i if self.dict['bias']: self.i_b = torch.nn.Parameter(torch.zeros((self.dict['N_num']), device=self.device)) else: self.i_b = 0.0 self.dict['b_0'] = self.i_b self.dict['r_b'] = search_dict(self.dict, ['r_b', 'bias'], default=True, write_default=False) if self.dict['r_b']: self.r_b = self.dict['r_b'] = torch.nn.Parameter(torch.zeros((self.dict['input_num']), device=self.device)) else: self.r_b = self.dict['r_b'] = 0.0 self.o = self.dict['o'] = nn.Parameter(torch.zeros((self.dict['N_num'], self.dict['output_num']), device=self.device, requires_grad=True)) self.r = self.dict['r'] = nn.Parameter(torch.zeros((self.dict['N_num'], self.dict['N_num']), device=self.device, requires_grad=True)) if self.dict.get('init_weight') is None: self.dict['init_weight'] = { 'r': ['input', 1.0], 'o': ['input', 1.0], 'i': ['input', 1.0], } init_weight(self.i, self.dict['init_weight']['i']) init_weight(self.r, self.dict['init_weight']['r']) init_weight(self.o, self.dict['init_weight']['o']) # set up basic attributes self.step_num = self.dict['step_num'] self.N_num = self.dict['N_num'] if self.dict['separate_ei']: self.time_const_e = self.dict['time_const_e'] self.time_const_i = self.dict['time_const_i'] self.act_func = self.get_act_func_ei() self.act_func_e = get_act_func(self.dict['act_func_e']) self.act_func_i = get_act_func(self.dict['act_func_i']) self.E_num = self.dict['E_num'] self.I_num = self.dict['I_num'] self.cal_s = self.cal_s_ei self.get_weight = self.get_weight_ei self.response = self.response_ei self.cache_weight = self.cache_weight_ei #self.weight_name = ['E->E','E->I','I->E','I->I','E->Y','I->Y','N->N','E.r','E.l','I.r','I.l','E.b','I.b','r','E.o','I.o','b'] self.response_keys = ['E.u','E.x','I.u','I.x','E->E','E->I','I->E','I->I','E->Y','I->Y', 'X->E', 'X->I', 'N->Y', 'N->N', 'u'] else: self.time_const = self.dict['time_const'] self.act_func = get_act_func(self.dict['act_func']) self.cal_s = self.cal_s_uni self.get_weight = self.get_weight_uni self.response = self.response_uni self.cache_weight = self.cache_weight_uni #self.weight_name = ['X->E', 'X->I', 'i', 'N->Y','N->N', 'N.o', 'o', 'b', 'r'] self.response_keys = ['o','r','u'] # set up input weight self.get_i = lambda :self.i # set up recurrent weight if self.dict['noself']: self.r_self_mask = torch.ones((self.dict['N_num'], self.dict['N_num']), device=self.device, requires_grad=False) for i in range(self.dict['N_num']): self.r_self_mask[i][i] = 0.0 self.get_r_noself = lambda :self.r * self.r_self_mask else: self.get_r_noself = lambda :self.r self.ei_mask = None self.cons_func = get_cons_func(self.dict['cons_method']) if 'r' in self.dict['Dale']: self.ei_mask = get_ei_mask(E_num=self.dict['E_num'], N_num=self.dict['N_num']).to(self.device) self.get_r_ei = lambda :torch.mm(self.ei_mask, self.cons_func(self.get_r_noself())) else: self.get_r_ei = self.get_r_noself if 'r' in self.dict['mask']: self.r_mask = get_mask(N_num=self.dict['N_num'], output_num=self.dict['N_num']).to(self.device) self.get_r_mask = lambda :self.r_mask * self.get_r_ei() else: self.get_r_mask = self.get_r_ei self.get_r = self.get_r_mask # set up forward weight if 'o' in self.dict['Dale']: #set mask for EI separation if(self.ei_mask is None): self.ei_mask = get_ei_mask(E_num=self.dict['E_num'], N_num=self.dict['N_num']) self.get_o_ei = lambda :torch.mm(self.ei_mask, self.cons_func(self.o)) else: self.get_o_ei = lambda :self.o if 'o' in self.dict['mask']: #set mask for connection pruning self.o_mask = get_mask(N_num=self.dict['N_num'], output_num=self.dict['output_num']) self.get_o_mask = lambda :self.o_mask * self.get_o_ei() else: self.get_o_mask = self.get_o_ei self.get_o = self.get_o_mask # set up method to generate initial s, h self.init_mode = self.dict.setdefault('init_mode', 'zero') if self.init_mode in ['zero']: self.get_s_h_init = self.get_s_h_init_zero elif self.init_mode in ['learnable', 'fixed']: self.get_s_h_init = self.get_s_h_init_fixed else: raise Exception('Invalid s and h init mode: %s'%self.init_mode) # set up method to generate noise if self.dict['noise_coeff'] == 0.0: self.get_noise = lambda batch_size, N_num:0.0 else: self.get_noise = self.get_noise_gaussian # loss settings self.loss_dict = self.dict['loss'] self.main_loss_func = get_loss_func(self.loss_dict['main_loss'], truth_is_label=True, num_class=self.loss_dict['num_class']) ''' if self.loss_dict['main_loss'] in ['CEL', 'cel']: self.main_loss_func = torch.nn.CrossEntropyLoss() elif self.loss_dict['main_loss'] in ['MSE', 'mse']: self.main_loss_func = torch.nn.MSELoss() ''' input_mode = get_name(self.dict['input_mode']) if input_mode in ['endure'] or input_mode is None: #default self.prep_input = self.prep_input_endure self.get_input = self.get_input_endure self.main_loss_coeff = self.loss_dict['main_loss_coeff'] self.hebb_coeff = self.loss_dict.setdefault('hebb_coeff', 0.0) self.act_coeff = self.loss_dict.setdefault('act_coeff', 0.0) self.weight_coeff = self.loss_dict.setdefault('weight_coeff', 0.0) # performance log settings self.perform_list = {'class':0.0, 'act':0.0, 'weight':0.0, 'acc':0.0} if self.hebb_coeff != 0.0: self.perform_list['hebb'] = 0.0 self.batch_count = 0 self.sample_count = 0 self.cache = {}
def get_selector(name, estimator=None, n_features_to_select=None, **params): if name == 'RobustSelector': return RobustSelector(estimator, n_features_to_select=n_features_to_select, **search_dict(params, ('cv', 'verbose'))) elif name == 'MaxFeatures': return SelectFromModel(estimator, threshold=-np.inf, max_features=n_features_to_select) elif name == 'RandomSubsetSelector': return RandomSubsetSelector(estimator, n_features_to_select=n_features_to_select, **search_dict(params, ('n_subsets', 'subset_size', 'random_state'))) elif name == 'FeatureImportanceThreshold': return SelectFromModel(estimator, **search_dict(params, 'threshold')) elif name == 'RFE': return RFE(estimator, n_features_to_select=n_features_to_select, **search_dict(params, ('step', 'verbose'))) elif name == 'RFECV': return RFECV(estimator, n_features_to_select=n_features_to_select, **search_dict(params, ('step', 'cv', 'verbose'))) elif name == 'FoldChangeFilter': return FoldChangeFilter(**search_dict(params, ('threshold', 'direction', 'below', 'pseudo_count'))) elif name == 'ZeroFractionFilter': return ZeroFractionFilter(**search_dict(params, ('threshold',))) elif name == 'RpkmFilter': return RpkmFilter(**search_dict(params, ('threshold',))) elif name == 'RpmFilter': return RpmFilter(**search_dict(params, ('threshold',))) elif name == 'DiffExpFilter': return DiffExpFilter(max_features=n_features_to_select, **search_dict(params, ('threshold', 'script', 'temp_dir', 'score_type', 'method'))) elif name == 'ReliefF': from skrebate import ReliefF return ReliefF(n_features_to_select=n_features_to_select, **search_dict(params, ('n_jobs', 'n_neighbors', 'discrete_limit'))) elif name == 'SURF': from skrebate import SURF return SURF(n_features_to_select=n_features_to_select, **search_dict(params, ('n_jobs', 'discrete_limit'))) elif name == 'MultiSURF': from skrebate import MultiSURF return MultiSURF(n_features_to_select=n_features_to_select, **search_dict(params, ('n_jobs', 'discrete_limit'))) elif name == 'SIS': return SIS(n_features_to_select=n_features_to_select, **search_dict(params, ('temp_dir', 'sis_params'))) elif name == 'NullSelector': return NullSelector() else: raise ValueError('unknown selector: {}'.format(name))
def get_classifier(name, **params): '''Get scoring function from string Parameters: name: str name of the clasifier params: keyword arguments extra parameters for the classifier Returns: estimator: object a BaseEstimator object ''' if name == 'LogisticRegression': return LogisticRegression( **search_dict(params, ('penalty', 'dual', 'C', 'tol', 'fit_intercept', 'solver', 'class_weight', 'max_iter', 'n_jobs', 'random_state', 'verbose'))) elif name == 'LogisticRegressionL1': return LogisticRegression( penalty='l1', **search_dict( params, ('dual', 'C', 'tol', 'fit_intercept', 'solver', 'class_weight', 'max_iter', 'n_jobs', 'random_state', 'verbose'))) elif name == 'LogisticRegressionL2': return LogisticRegression( penalty='l2', **search_dict( params, ('dual', 'C', 'tol', 'fit_intercept', 'solver', 'class_weight', 'max_iter', 'n_jobs', 'random_state', 'verbose'))) elif name == 'RandomForestClassifier': return RandomForestClassifier( **search_dict(params, ('n_estimators', 'criterion', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_impurity_split', 'oob_score', 'n_jobs', 'verbose', 'random_state', 'class_weight'))) elif name == 'LinearSVC': return LinearSVC( **search_dict(params, ('penalty', 'loss', 'dual', 'tol', 'C', 'fit_intercept', 'intercept_scaling', 'class_weight', 'verbose', 'random_state', 'max_iter'))) elif name == 'SVC': return SVC( **search_dict(params, ('penalty', 'loss', 'dual', 'tol', 'C', 'fit_intercept', 'gamma', 'intercept_scaling', 'class_weight', 'verbose', 'random_state', 'max_iter'))) elif name == 'DecisionTreeClassifier': return DecisionTreeClassifier( **search_dict(params, ('criterion', 'splitter', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_impurity_split'))) elif name == 'ExtraTreesClassifier': return ExtraTreesClassifier( **search_dict(params, ('n_estimators', 'criterion', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_impurity_split', 'oob_score', 'n_jobs', 'verbose', 'random_state', 'class_weight'))) elif name == 'MLPClassifier': from sklearn.neural_network import MLPClassifier return MLPClassifier( **search_dict(params, ('hidden_layer_sizes', 'activation', 'solver', 'alpha', 'batch_size', 'learning_rate', 'max_iter'))) elif name == 'SGDClassifier': from sklearn.linear_model import SGDClassifier return SGDClassifier( **search_dict(params, ('loss', 'penalty', 'alpha', 'l1_ratio', 'fit_intercept', 'max_iter', 'tol', 'epsilon'))) else: raise ValueError('unknown classifier: {}'.format(name))
def fit(self, X, y=None, sample_weight=None): self.preprocess_steps_ = [] if self.zero_fraction_filter: logger.debug('add zero_fraction_filter with parameters: {}'.format( self.zero_fraction_filter_params)) self.preprocess_steps_.append( ('zero_fraction_filter', get_selector('zero_fraction_filter', **self.zero_fraction_filter_params))) ''' if self.rpkm_filter: logger.debug('add rpkm_filter with parameters: {}'.format(self.rpkm_filter_params)) if self.feature_names is None: raise ValueError('feature_names is required for rpkm_filter') gene_lengths = self.get_gene_lengths_from_feature_names(feature_names) step = get_selector('rpkm_filter', **rpkm_filter_params) step.set_gene_lengths(gene_lengths) preprocess_steps.append(('rpkm_filter', step)) ''' if self.rpm_filter: logger.debug('add rpm_filter with parameters: {}'.format( self.rpm_filter_params)) self.preprocess_steps_.append( ('rpm_filter', get_selector('rpm_filter', **self.rpkm_filter_params))) if self.fold_change_filter: logger.debug('add fold_change_filter with parameters: {}'.format( self.fold_change_filter_params)) self.preprocess_steps_.append( ('fold_change_filter', get_selector('fold_change_filter', **self.fold_change_filter_params))) if self.diffexp_filter: logger.debug('add diffexp_filter with parameters: {}'.format( self.diffexp_filter_params)) self.preprocess_steps_.append( ('diffexp_filter', get_selector('diffexp_filter', **self.diffexp_filter_params))) if self.log_transform: logger.debug('add log_transform with parameters: {}'.format( self.log_transform_params)) self.preprocess_steps_.append( ('log_transform', get_scaler('log_transform', **self.log_transform_params))) if self.scaler is not None: logger.debug('add scaler "{}" with parameters: {}'.format( self.scaler, self.scaler_params)) self.preprocess_steps_.append( ('scaler', get_scaler(self.scaler, **self.scaler_params))) # preprocess features X_new = X self.features_ = np.arange(X.shape[1]) for name, step in self.preprocess_steps_: X_new = step.fit_transform(X_new, y) setattr(self, name + '_', step) if isinstance(step, SelectorMixin): self.features_ = self.features_[step.get_support()] logger.debug('add classifier "{}" with parameters: {}'.format( self.classifier, self.classifier_params)) self.classifier_ = get_classifier(self.classifier, **self.classifier_params) # grid search for hyper-parameters if self.grid_search: logger.debug('add grid_search with parameters: {}'.format( self.grid_search_params)) grid_search_params = deepcopy(self.grid_search_params) if 'cv' in grid_search_params: grid_search_params['cv'] = get_splitter( **grid_search_params['cv']) grid_search_params['param_grid'] = grid_search_params[ 'param_grid'][self.classifier] self.grid_search_ = GridSearchCV( estimator=self.classifier_, **search_dict( grid_search_params, ('param_grid', 'scoring', 'cv', 'fit_params', 'verbose', 'return_train_score', 'error_score', 'iid'))) self.grid_search_.fit(X_new, y, sample_weight=sample_weight) self.classfier_ = self.grid_search_.best_estimator_ self.best_classifier_params_ = self.grid_search_.best_params_ self.classifier_.set_params(**self.grid_search_.best_params_) #logger.info('best params: {}'.format(self.grid_search_.best_params_)) #logger.info('mean test score: {}'.format(self.grid_search_.cv_results_['mean_test_score'])) # feature selection if self.selector: logger.debug('add selector "{}" with parameters: {}'.format( self.selector, self.selector_params)) logger.debug('number of features to select: {}'.format( self.n_features_to_select)) # classifier for feature selection wrapper selector_classifier = None if 'classifier' in self.selector_params: selector_classifier = get_classifier( self.selector_params['classifier'], **self.selector_params['classifier_params']) else: selector_classifier = self.classifier_ self.selector_ = get_selector( self.selector, estimator=selector_classifier, n_features_to_select=self.n_features_to_select, **self.selector_params) X_new = self.selector_.fit_transform(X_new, y) self.features_ = self.features_[self.selector_.get_support()] # refit the classifier with selected features self.classifier_.fit(X_new, y, sample_weight=sample_weight) # set feature importances self.feature_importances_ = get_feature_importances(self.classifier_) return self
from youtube_api import YoutubeDataApi import pandas as pd from utils import search_dict from utils import API_KEY query = 'Juul' youtube = YoutubeDataApi(key=API_KEY) searches = youtube.search(q=query) print(len(searches)) searches = [search_dict(search) for search in searches] searches_df = pd.DataFrame(searches) searches_df.to_csv('{}_searches.csv'.format(query), index=False)