def main(c_runtime, c_transformer, c_model, c_trainer, c_log): with blocktimer('Preprocess'): train, test = Transformer.run(**c_transformer.__dict__) X_train, y_train, X_test = split_X_y(train, test) with blocktimer('Tune & Train'): modelfactory = ModelFactory() # tune the model params model = modelfactory.create(c_model) optimal_c_model = tune_gbdt_params(model, X_train, y_train, c_trainer.n_splits) # train with best params, full data model = modelfactory.create(optimal_c_model) model = model.train(X_train, y_train) with blocktimer('Predict'): sub = pd.DataFrame(columns=['TransactionID', 'isFraud']) sub['TransactionID'] = test['TransactionID'] y_test = model.predict(X_test) sub['isFraud'] = y_test sub.to_csv(c_runtime.out_sub_path, index=False) logger.info(f'Saved {c_runtime.out_sub_path}')
def load_model(self, create_raw_model = False): if not self.config["LOAD_MODEL"]: raise ValueError('LOAD_MODEL config must be set to true for inference') if create_raw_model: self.config["LOAD_MODEL"] = False model_factory = ModelFactory(self.config) model = model_factory.create_model(self.model_name) return model
def test_invalid_id_of_device_mapping(self): train_strategy = ipu.ipu_strategy.IPUStrategy() with train_strategy.scope(): model = ModelFactory.create_model(model_name='toy_model', weights=None, input_shape=(28, 28, 1), classes=10) with self.assertRaises(DimensionError): model = ModelFactory.configure_model( model=model, gradient_accumulation_count=1, pipeline_splits=['conv2d_1', 'flatten'], device_mapping=[1, 2, 3], pipeline_schedule='Grouped', available_memory_proportion=[])
def execute(self, model_simi: BaseModel): model = ModelFactory.get_model(self.model_type) if os.path.isfile(self.model_path): model.load_model(self.model_path) return model sal_data = None for i, path in enumerate(self.img_path): im_data = self.parse_data(self.img_data[i], self.img_data_level0[i], model_simi) for j, rlist in enumerate(im_data.rlists): data = Region2Csv.generate_seg_csv(rlist, im_data.feature93s[j], self.seg_path[i]) if data is None: continue if sal_data is None: sal_data = data else: sal_data = np.vstack((sal_data, data)) y_train, x_train = self.prepare_data(sal_data) model.train(x_train, y_train) model.save_model(self.model_path) return model
def create_training_parts(batch_size, imshape, anchors_per_scale, ckpt_path, learning_rate, loss_weights, valid_category, weight_suffix='latest'): model = ModelFactory(batch_size, imshape, anchors_per_scale).get_model() model = try_load_weights(ckpt_path, model, weight_suffix) loss_object = IntegratedLoss(loss_weights, valid_category) optimizer = tf.optimizers.Adam(lr=learning_rate) return model, loss_object, optimizer
def execute(self): model = ModelFactory.get_model(model_name=self.model_type) if os.path.isfile(self.model_path): model.load_model(self.model_path) return model simi_data = None for i, path in enumerate(self.img_path): im_data = self.check_exist( img_path=self.img_path[i], img_path_level0=self.img_data_level0[i] ) data = Region2Csv.generate_similar_csv( im_data.rlist, im_data.comb_features, self.seg_path[i] ) if simi_data is None: simi_data = data else: simi_data = np.vstack((simi_data, data)) logging.info("Finished simi {}".format(i)) y_train, x_train = self.prepare_data(simi_data) model.train(x_train, y_train) model.save_model(self.model_path) return model
def test_pipeline_split(self): def initial_model_1(): model_input = keras.Input(shape=(32, 32, 3)) model_output = keras.layers.MaxPooling2D( name='test_pipeline_split_layer1')(model_input) model_output_1 = keras.layers.Conv2D( filters=32, kernel_size=3, name='test_pipeline_split_layer2')(model_output) model_output_2 = keras.layers.Conv2D( filters=32, kernel_size=3, name='test_pipeline_split_layer3')(model_output) model_output = keras.layers.Add(name='test_pipeline_split_layer4')( [model_output_1, model_output_2]) model_output = keras.layers.Flatten( name='test_pipeline_split_layer5')(model_output) return keras.Model(model_input, model_output) def expected_model_1(): model_input = keras.Input(shape=(32, 32, 3)) with ipu.keras.PipelineStage(0): model_output = keras.layers.MaxPooling2D()(model_input) model_output_1 = keras.layers.Conv2D( filters=32, kernel_size=3)(model_output) with ipu.keras.PipelineStage(1): model_output_2 = keras.layers.Conv2D( filters=32, kernel_size=3)(model_output) model_output = keras.layers.Add()( [model_output_1, model_output_2]) with ipu.keras.PipelineStage(2): model_output = keras.layers.Flatten()(model_output) return keras.Model(model_input, model_output) train_strategy = ipu.ipu_strategy.IPUStrategy() with train_strategy.scope(): model = initial_model_1() pipelined_model = ModelFactory.configure_model( model=model, gradient_accumulation_count=1, pipeline_splits=[ 'test_pipeline_split_layer3', 'test_pipeline_split_layer5' ], device_mapping=[], pipeline_schedule='Grouped', available_memory_proportion=[]) expected_assignments = expected_model_1( ).get_pipeline_stage_assignment() pipelined_assignments = pipelined_model.get_pipeline_stage_assignment( ) for expected_assignment, pipelined_assignment in zip( expected_assignments, pipelined_assignments): assert (expected_assignment.layer.__class__.name == pipelined_assignment.layer.__class__.name) assert (expected_assignment.pipeline_stage == pipelined_assignment.pipeline_stage)
def main(c): with blocktimer('Preprocess'): train, test = Transformer.run(**c.transformer.__dict__) X_train, y_train, X_test = split_X_y(train, test) test = test.sort_values('TransactionDT') with blocktimer('Tune & Train'): modelfactory = ModelFactory() # tune the model params model = modelfactory.create(c.model) optimal_c_model = tune_gbdt_params(model, X_train, y_train, c.trainer.n_splits) # train with best params, full data model = modelfactory.create(optimal_c_model) model = model.train(X_train, y_train) # save results model.save(c.model.dir / f'model_{c.runtime.VERSION}_{c.model.TYPE}.pkl') importance = pd.DataFrame(model.feature_importance, index=X_train.columns, columns=['importance']) importance_path = c.runtime.ROOTDIR / 'feature/importance' / f'importance_{c.runtime.VERSION}.csv' importance.to_csv(importance_path) logger.info(f'Saved {str(importance_path)}') with blocktimer('Predict'): sub = pd.DataFrame(columns=['TransactionID', 'isFraud']) sub['TransactionID'] = test['TransactionID'] y_test = model.predict(X_test) sub['isFraud'] = y_test sub.to_csv(c.runtime.out_sub_path, index=False) logger.debug(f'Saved {c.runtime.out_sub_path}')
def test_toy_model_factory_prediction(self): tf.random.set_seed(1) model = ModelFactory.create_model(model_name='toy_model', weights=None, input_shape=(32, 32, 3), classes=10) image_1 = np.ones((1, 32, 32, 3)) * 10 assert (np.allclose( model.predict(image_1)[0], [ 0.08292384, 0.05735856, 0.27028584, 0.2666999, 0.02177826, 0.01853362, 0.06498592, 0.04272136, 0.15957771, 0.015135 ])) tf.random.set_seed(None)
def get_predictions_for_model(self, model_name: str): tf.random.set_seed(1) np.random.seed(0) image0 = np.zeros((1, 32, 32, 3)) image1 = np.ones((1, 32, 32, 3)) * 10 model = ModelFactory.create_model(model_name=model_name, input_shape=(32, 32, 3), classes=2) image0_preds = model.predict(image0)[0] image1_preds = model.predict(image1)[0] tf.random.set_seed(None) np.random.seed(None) return (image0_preds, image1_preds)
def execute(self, model_sal): model = ModelFactory.get_model(self.model_type) if os.path.isfile(self.model_path): model.load_model(self.model_path) return model ground_truths = None salience_maps = None for i, path in enumerate(self.img_data): im_data = pickle.load(open(path, "rb+")) seg_num = len(im_data.rlists) if seg_num < len(self.C_LIST) + 1: continue height = im_data.rmat.shape[0] width = im_data.rmat.shape[1] salience_map = np.zeros([seg_num, height, width]) for j, rlist in enumerate(im_data.rlists): Y = model_sal.predict(im_data.feature93s[j])[:, 1] for k, r in enumerate(rlist): salience_map[j][r] = Y[k] ground_truth = cv2.imread(self.seg_path[i])[:, :, 0] ground_truth[ground_truth == 255] = 1 if salience_maps is None: salience_maps = salience_map.reshape([-1, height * width]).T else: salience_maps = np.append(salience_maps, salience_map.reshape( [-1, height * width]).T, axis=0) if ground_truths is None: ground_truths = ground_truth.reshape(-1) else: ground_truths = np.append(ground_truths, ground_truth.reshape(-1), axis=0) x_train = salience_maps y_train = ground_truths model.train(x_train, y_train) model.save_model(self.model_path)
def objective(trial, X_train, y_train, X_test, cols, c): ''' Define objectives for optuna ''' modelfactory = ModelFactory() if c.model.type == 'lightgbm': max_depth = trial.suggest_int('max_depth', 3, 12) params_to_tune = { # num_leaves should be smaller than approximately 2^max_depth*0.75 'num_leaves': 2**max_depth * 3 // 4, 'max_depth': max_depth, 'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-3, 1e0), 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-2, 1e0), 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-2, 1e0), 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 50, 200), 'feature_fraction': trial.suggest_uniform('feature_fraction', 0, 1), 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0, 1) } elif c.model.type == 'xgboost': params_to_tune = { 'min_split_loss': trial.suggest_loguniform('min_split_loss', 1e-3, 1e0), 'max_depth': trial.suggest_int('max_depth', 3, 12), 'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-3, 1e0), 'subsample': trial.suggest_uniform('subsample', 0, 1), 'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.0, 1.0), 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 1e0), 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 1e0) } elif c.model.type == 'catboost': max_depth = trial.suggest_int('max_depth', 3, 12) params_to_tune = { # num_leaves should be smaller than approximately 2^max_depth*0.75 # 'num_leaves': 2 ** max_depth * 3 // 4, 'max_depth': max_depth, 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-2, 1e0) } # apply suggested params params = c.model.params.copy() params.update(params_to_tune) # Train by 6-fold CV oof = np.zeros(len(X_train)) preds = np.zeros(len(X_test)) skf = GroupKFold(n_splits=6) for i, (idxT, idxV) in enumerate( skf.split(X_train, y_train, groups=X_train['DT_M'])): fold = i + 1 month = X_train.iloc[idxV]['DT_M'].iloc[0] model_fold_path = f'data/model/model_{c.runtime.version}_opt_fold{fold}{c.runtime.dsize}.pkl' model = modelfactory.create(c.model) logger.info(f'Fold {fold} withholding month {month}') logger.info( f'rows of train= {len(idxT)}, rows of holdout= {len(idxV)}') model = model.train( X_train[cols].iloc[idxT], y_train.iloc[idxT], X_train[cols].iloc[idxV], y_train.iloc[idxV], params=params, num_boost_round=c.train.num_boost_round, early_stopping_rounds=c.train.early_stopping_rounds, fold=i + 1) oof[idxV] = model.predict(X_train[cols].iloc[idxV]) preds += model.predict(X_test[cols]) / skf.n_splits r.paths.update({f'model_fold_{fold}_path': model_fold_path}) model.save(r.paths[f'model_fold_{fold}_path']) del model score = roc_auc_score(y_train, oof) logger.info(f'Fold {fold} OOF cv= {score}') mlflow.log_metric('oof_cv_score', score, step=trial.number) return score
# -*- coding: utf-8 -*- import sys from configuration import Configuration from dataset import Dataset from model.model_factory import ModelFactory if __name__ == '__main__': if (len(sys.argv)) != 4: raise Exception( "Invalid number of arguments. Got {} arguments, expected 4.". format(len(sys.argv))) train_path = sys.argv[1] test_path = sys.argv[2] config_path = sys.argv[3] config = Configuration.from_path(config_path) train = Dataset.from_path(train_path) test = Dataset.from_path(test_path) model = ModelFactory.get(config.model)(config) model.fit(train) model.prediction(test)
def get_model(self): model = ModelFactory.get_model(self.model_type) model.load_model(self.model_path) return model
def test_unsupported_model(self): with self.assertRaises(NameError): ModelFactory.create_model(model_name='foo', input_shape=(32, 32, 3), classes=2)
f'steps_per_execution {steps_per_execution} should divide micro_batches_per_epoch = {micro_batches_per_epoch}' ) time_to_train_timer = time_to_train.TimeToTrain() # Create an IPU distribution strategy train_strategy = PopDistStrategy( ) if distributed_training else ipu.ipu_strategy.IPUStrategy() with train_strategy.scope(): # Create an instance of the model model = ModelFactory.create_model( model_name=model_name, input_shape=img_shape, classes=num_classes, accelerator_side_preprocessing_fn= accelerator_side_preprocess_train_fn, eight_bit_transfer=eight_bit_transfer) model = ModelFactory.configure_model( model=model, gradient_accumulation_count=batch_config. gradient_accumulation_count, pipeline_splits=pipeline_splits, device_mapping=device_mapping, pipeline_schedule=pipeline_schedule, available_memory_proportion=available_memory_proportion, optimizer_state_offloading=optimizer_state_offloading) if training:
def main(c): dsize = '.small' if c.runtime.use_small_data is True else '' paths = EasyDict() scores = EasyDict() modelfactory = ModelFactory() with blocktimer('Preprocess', level=INFO): paths.in_train_path = f'data/feature/{c.features[0]}_train.pkl' paths.in_test_path = f'data/feature/{c.features[0]}_test.pkl' train = pd.read_pickle(paths.in_train_path) test = pd.read_pickle(paths.in_test_path) logger.debug(f'Loaded feature {c.features[0]}') if c.runtime.use_small_data: frac = 0.001 train = train.sample(frac=frac, random_state=42) test = test.sample(frac=frac, random_state=42) logger.debug(f'train.shape: {train.shape}, test.shape: {test.shape}') # Split into X, y X_train = train.drop('isFraud', axis=1) X_test = test y_train = train['isFraud'].copy(deep=True) del train, test with blocktimer('Optimize', level=INFO): if c.train.optimize_num_boost_round is True: # tune the model params model = modelfactory.create(c.model) best_iteration = optimize_num_boost_round(model, X_train[c.cols], y_train, c.train.n_splits, dsize, paths, scores) else: logger.debug('Skip optimization') best_iteration = c.train.num_boost_round with blocktimer('Train', level=INFO): logger.debug(f'Now using the following {len(c.cols)} features.') logger.debug(f'{np.array(c.cols)}') # CHRIS - TRAIN 75% PREDICT 25% idxT = X_train.index[:3 * len(X_train) // 4] idxV = X_train.index[3 * len(X_train) // 4:] ''' model = modelfactory.create(c.model) model = model.train(X_train.loc[idxT, :], y_train[idxT], X_train.loc[idxV, :], y_train[idxV], num_boost_round=best_iteration) importance = pd.DataFrame(model.feature_importance, index=X_train.columns, columns=['importance']) # save results paths.out_model_dir = f'data/model/model_{c.runtime.version}_{c.model.type}{dsize}.pkl' paths.importance_path = f'feature/importance/importance_{c.runtime.version}{dsize}.csv' model.save(paths.out_model_dir) importance.to_csv(paths.importance_path) ''' from sklearn.model_selection import GroupKFold from sklearn.metrics import roc_auc_score oof = np.zeros(len(X_train)) preds = np.zeros(len(X_test)) skf = GroupKFold(n_splits=6) for i, (idxT, idxV) in enumerate( skf.split(X_train, y_train, groups=X_train['DT_M'])): month = X_train.iloc[idxV]['DT_M'].iloc[0] logger.info(f'Fold {i+1} withholding month {month}') logger.info( f'rows of train ={len(idxT)}, rows of holdout ={len(idxV)}') categorical_features = [ 'ProductCD', 'M4', 'card1', 'card2', 'card3', 'card5', 'card6', 'addr1', 'addr2', 'dist1', 'dist2', 'P_emaildomain', 'R_emaildomain', ] model = modelfactory.create(c.model) model = model.train( X_train[c.cols].iloc[idxT], y_train.iloc[idxT], X_train[c.cols].iloc[idxV], y_train.iloc[idxV], num_boost_round=best_iteration, early_stopping_rounds=c.train.early_stopping_rounds, # categorical_features=categorical_features, fold=i + 1) oof[idxV] += model.predict(X_train[c.cols].iloc[idxV]) preds += model.predict(X_test[c.cols]) / skf.n_splits del model logger.info(f'OOF cv= {roc_auc_score(y_train, oof)}') paths.importance_path = f'feature/importance/importance_{c.runtime.version}{dsize}.csv' # model.save(paths.out_model_dir) ''' importance = pd.DataFrame(model.feature_importance, index=X_train.columns, columns=['importance']) importance.to_csv(paths.importance_path) ''' with blocktimer('Predict', level=INFO): # y_test = model.predict(X_test) sub = pd.DataFrame(columns=['TransactionID', 'isFraud']) sub['TransactionID'] = X_test.reset_index()['TransactionID'] # sub['isFraud'] = y_test sub['isFraud'] = preds paths.out_sub_path = f'data/submission/submission_{c.runtime.version}{dsize}.csv' sub.to_csv(paths.out_sub_path, index=False) result = EasyDict() result.update(c) result.scores = scores result.paths = paths return result
Y_valid_augmented = None synt_sufix = '' datagen = None if args.synthetic_data: synt_sufix = '_synt_' + args.noise datagen = create_data_generator(args.noise) print(datagen.__dict__) if args.dev: models.clear() models.append(TrainingParameters('dev', 128)) models.append(TrainingParameters('dev1', 128)) mf = ModelFactory() scores = [] for m in models: # Create directory for model model_dir = os.path.join('models', str(m) + ed_sufix + all_sufix + synt_sufix) if os.path.exists(model_dir): shutil.rmtree(model_dir) os.makedirs(model_dir) # JPG only keras_model = mf.create_model(m.model_name, in_shape=(m.in_size, m.in_size, 3), parameters=m.parameters) print ("Model created successfuly. Compiliing") keras_model.model.compile(loss=m.loss, optimizer=m.optimizer, metrics=['accuracy']) print ("Compilation done") print (keras_model.model.summary()) if args.multi_gpu:
def main(c): dsize = '.small' if c.runtime.use_small_data is True else '' paths = EasyDict() scores = EasyDict() result = EasyDict() result.update(c) modelfactory = ModelFactory() with blocktimer('Preprocess', level=INFO): paths.in_train_path = f'data/feature/{c.features[0]}_train.pkl' paths.in_test_path = f'data/feature/{c.features[0]}_test.pkl' train = pd.read_pickle(paths.in_train_path) test = pd.read_pickle(paths.in_test_path) logger.debug(f'Loaded feature {c.features[0]}') if c.runtime.use_small_data: frac = 0.001 train = train.sample(frac=frac, random_state=42) test = test.sample(frac=frac, random_state=42) logger.debug(f'train.shape: {train.shape}, test.shape: {test.shape}') # Split into X, y X_train = train.drop('isFraud', axis=1) X_test = test y_train = train['isFraud'].copy(deep=True) del train, test with blocktimer('Optimize num_boost_round', level=INFO): if c.train.optimize_num_boost_round is True: # tune the model params model = modelfactory.create(c.model) best_iteration = optimize_num_boost_round(model, X_train[c.cols], y_train, c.train.n_splits, dsize, paths, scores) else: logger.debug('Skip optimization') best_iteration = c.train.num_boost_round with blocktimer('Optimize model params', level=INFO): if c.train.optimize_model_params is True: # define objective for optuna def objectives(trial): max_depth = trial.suggest_int('max_depth', 3, 12) params = { 'boosting_type': 'gbdt', # num_leaves should be smaller than approximately 2^max_depth*0.75 'num_leaves': 2**max_depth * 3 // 4, 'max_depth': max_depth, 'learning_rate': 0.05, 'objective': 'binary', 'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-3, 1e0), # 0.03454472573214212, 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-2, 1e0), # 0.3899927210061127, 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-2, 1e0), # 0.6485237330340494, 'random_state': 42, 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 50, 200), # 106, 'metric': 'auc', 'max_bin': 255 } c.model.params = params # Train by 6-fold CV oof = np.zeros(len(X_train)) preds = np.zeros(len(X_test)) skf = GroupKFold(n_splits=6) for i, (idxT, idxV) in enumerate( skf.split(X_train, y_train, groups=X_train['DT_M'])): fold = i + 1 month = X_train.iloc[idxV]['DT_M'].iloc[0] model_fold_path = f'data/model/model_{c.runtime.version}_{c.model.type}_opt_fold{fold}{dsize}.pkl' model = modelfactory.create(c.model) logger.info(f'Fold {fold} withholding month {month}') logger.info( f'rows of train= {len(idxT)}, rows of holdout= {len(idxV)}' ) model = model.train( X_train[c.cols].iloc[idxT], y_train.iloc[idxT], X_train[c.cols].iloc[idxV], y_train.iloc[idxV], num_boost_round=best_iteration, early_stopping_rounds=c.train.early_stopping_rounds, # categorical_features=categorical_features, fold=i + 1) oof[idxV] = model.predict(X_train[c.cols].iloc[idxV]) preds += model.predict(X_test[c.cols]) / skf.n_splits paths.update({f'model_fold_{fold}_path': model_fold_path}) model.save(paths[f'model_fold_{fold}_path']) del model score = roc_auc_score(y_train, oof) logger.info(f'Fold {fold} OOF cv= {score}') return score # run optimization opt = optuna.create_study( direction='maximize', study_name=f'parameter_study_0016{dsize}', storage= f'sqlite:///data/optimization/parameter_study_0016{dsize}.db', load_if_exists=True) opt.optimize(objectives, n_trials=20) trial = opt.best_trial logger.debug(f'Best trial: {trial.value}') logger.debug(f'Best params: {trial.params}') scores.best_trial = trial.value result.optimize = {} result.optimize.best_params = trial.params else: logger.debug('Skip optimization') with blocktimer('Train', level=INFO): if c.train.train_model: logger.debug(f'Now using the following {len(c.cols)} features.') logger.debug(f'{np.array(c.cols)}') oof = np.zeros(len(X_train)) preds = np.zeros(len(X_test)) skf = GroupKFold(n_splits=6) for i, (idxT, idxV) in enumerate( skf.split(X_train, y_train, groups=X_train['DT_M'])): month = X_train.iloc[idxV]['DT_M'].iloc[0] logger.info(f'Fold {i+1} withholding month {month}') logger.info( f'rows of train ={len(idxT)}, rows of holdout ={len(idxV)}' ) ''' categorical_features = ['ProductCD', 'M4', 'card1', 'card2', 'card3', 'card5', 'card6', 'addr1', 'addr2', 'dist1', 'dist2', 'P_emaildomain', 'R_emaildomain', ] ''' model = modelfactory.create(c.model) model = model.train( X_train[c.cols].iloc[idxT], y_train.iloc[idxT], X_train[c.cols].iloc[idxV], y_train.iloc[idxV], num_boost_round=best_iteration, early_stopping_rounds=c.train.early_stopping_rounds, # categorical_features=categorical_features, fold=i + 1) oof[idxV] = model.predict(X_train[c.cols].iloc[idxV]) preds += model.predict(X_test[c.cols]) / skf.n_splits del model logger.info(f'OOF cv= {roc_auc_score(y_train, oof)}') paths.importance_path = f'feature/importance/importance_{c.runtime.version}{dsize}.csv' # model.save(paths.out_model_dir) ''' importance = pd.DataFrame(model.feature_importance, index=X_train.columns, columns=['importance']) importance.to_csv(paths.importance_path) ''' with blocktimer('Predict', level=INFO): if c.train.predict: sub = pd.DataFrame(columns=['TransactionID', 'isFraud']) sub['TransactionID'] = X_test.reset_index()['TransactionID'] sub['isFraud'] = preds paths.out_sub_path = f'data/submission/submission_{c.runtime.version}{dsize}.csv' sub.to_csv(paths.out_sub_path, index=False) result.scores = scores result.paths = paths return result
def generate_model(self): model_factory = ModelFactory(self.config) model = model_factory.create_model(self.model_name) compile_para = self.model_compile_para() model.compile_model(**compile_para) return model
def build_model(problem_name, observation_shape, num_actions, model_params): # Model model_manager = ModelFactory(problem_name, observation_shape, num_actions, model_params) model = model_manager.get_model() return model
import os from werkzeug.utils import secure_filename from flask import Flask, render_template, Response, url_for, request from model.model_factory import ModelFactory app = Flask(__name__) model_factory = ModelFactory() @app.route('/') def index(): return render_template('index.html') @app.route('/predict', methods=['POST', "GET"]) def prediction(): model = model_factory.get_resnet34() if request.method == 'POST': f = request.files['file'] basepath = os.path.dirname(__file__) file_path = os.path.join(basepath, 'uploads', secure_filename(f.filename)) f.save(file_path) pred_class = model.predict(file_path) return pred_class return None