def main(args): model_storage_type = args.model_storage_type if (model_storage_type == "local" or model_storage_type == "oss"): print("The storage type is " + model_storage_type) else: raise Exception("Only supports storage types like local and OSS") if args.job_type == "Predict": logging.info("starting the predict job") predict(args) elif args.job_type == "Train": logging.info("starting the train job") model = train(args) if model is not None: logging.info("finish the model training, and start to dump model ") model_path = args.model_path dump_model(model, model_storage_type, model_path, args) elif args.job_type == "All": logging.info("starting the train and predict job") logging.info("Finish distributed XGBoost job")
def train(self, model, feature_name, model_dir, name): model.train() model_directory = os.path.join(model_dir) if not os.path.exists(model_directory): os.makedirs(model_directory) filename = os.path.join( model_directory, name + '_' + 'train' + '_' + feature_name + '.pkl') dump_model(model, filename) return model
def dump_models(name, f1_score, time_mark, data_pipeline=DataPipeline, learn_pipeline=LearningPipeline): path = '../dumps/history/%s__%s__%s__' % (name, time_mark, f1_score) dump_model(path + 'data.bin', data_pipeline) dump_model(path + 'learn.bin', learn_pipeline) path = '../dumps/%s__' % (name) dump_model(path + 'data.bin', data_pipeline) dump_model(path + 'learn.bin', learn_pipeline)
def main(args): if args.job_type == "Predict": logging.info("starting the predict job") predict(args) elif args.job_type == "Train": logging.info("starting the train job") model = train(args) if model is not None: logging.info("finish the model training, and start to dump model ") model_storage_type = args.model_storage_type model_path = args.model_path dump_model(model, model_storage_type, model_path, args) elif args.job_type == "All": logging.info("starting the train and predict job") logging.info("Finish distributed XGBoost job")
def train(epoch): print("begin train") model.train() criterion_c = nn.CrossEntropyLoss() criterion_a = nn.MultiLabelSoftMarginLoss() # due to a problem I commented out these lines #if cfg.ENABLE_TRIPLET_WITH_COSINE: #criterion_t = cfg.TripletMarginLossCosine() #else: criterion_t = nn.TripletMarginLoss() print("build loss") triplet_loader_iter = iter(triplet_loader) print("triplet_loader") triplet_type = 0 if cfg.ENABLE_INSHOP_DATASET: triplet_in_shop_loader_iter = iter(triplet_in_shop_loader) print("in shop") for batch_idx, (data, target) in enumerate(train_loader): print("my train") if batch_idx % cfg.TEST_INTERVAL == 0: print("test") test() category=target['category'] attribute=target['attribute'] data, category, attribute = data.cpu(), category.cpu(), attribute.cpu() data, category, attribute = Variable(data), Variable(category),Variable(attribute) print("get data") optimizer.zero_grad() output1 = model(data)[0] output2 = model(data)[1] attribute = attribute.type(torch.FloatTensor) classification_loss = criterion_c(output1, category) attribute_loss = criterion_a(output2, attribute) if cfg.TRIPLET_WEIGHT: print("use triplet") if cfg.ENABLE_INSHOP_DATASET and random.random() < cfg.INSHOP_DATASET_PRECENT: triplet_type = 1 try: data_tri_list = next(triplet_in_shop_loader_iter) except StopIteration: triplet_in_shop_loader_iter = iter(triplet_in_shop_loader) data_tri_list = next(triplet_in_shop_loader_iter) else: triplet_type = 0 try: data_tri_list = next(triplet_loader_iter) except StopIteration: triplet_loader_iter = iter(triplet_loader) data_tri_list = next(triplet_loader_iter) triplet_batch_size = data_tri_list[0].shape[0] data_tri = torch.cat(data_tri_list, 0) data_tri = data_tri.cpu() data_tri = Variable(data_tri, requires_grad=True) feats = model(data_tri)[1] triplet_loss = criterion_t( feats[:triplet_batch_size], feats[triplet_batch_size:2 * triplet_batch_size], feats[2 * triplet_batch_size:] ) loss = classification_loss + triplet_loss * cfg.TRIPLET_WEIGHT+attribute_loss else: loss = classification_loss + attribute_loss loss.backward() optimizer.step() if batch_idx % cfg.LOG_INTERVAL == 0: if cfg.TRIPLET_WEIGHT: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tAll Loss: {:.4f}\t' 'Triple Loss({}): {:.4f}\tClassification Loss: {:.4f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0], triplet_type, triplet_loss.data[0], classification_loss.data[0])) else: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tClassification Loss: {:.4f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), float(loss.data))) if batch_idx and batch_idx % cfg.DUMP_INTERVAL == 0: print('Model saved to {}'.format(dump_model(model, epoch, batch_idx))) print('Model saved to {}'.format(dump_model(model, epoch)))
def train(epoch): model.train() criterion_c = nn.CrossEntropyLoss() if cfg.ENABLE_TRIPLET_WITH_COSINE: criterion_t = cfg.TripletMarginLossCosine() else: criterion_t = nn.TripletMarginLoss() triplet_loader_iter = iter(triplet_loader) triplet_type = 0 if cfg.ENABLE_INSHOP_DATASET: triplet_in_shop_loader_iter = iter(triplet_in_shop_loader) for batch_idx, (data, target) in enumerate(train_loader): if batch_idx % cfg.TEST_INTERVAL == 0: test() data, target = data.cuda(cfg.GPU_ID), target.cuda(cfg.GPU_ID) data, target = Variable(data), Variable(target) optimizer.zero_grad() outputs = model(data)[0] classification_loss = criterion_c(outputs, target) if cfg.TRIPLET_WEIGHT: if cfg.ENABLE_INSHOP_DATASET and random.random( ) < cfg.INSHOP_DATASET_PRECENT: triplet_type = 1 try: data_tri_list = next(triplet_in_shop_loader_iter) except StopIteration: triplet_in_shop_loader_iter = iter(triplet_in_shop_loader) data_tri_list = next(triplet_in_shop_loader_iter) else: triplet_type = 0 try: data_tri_list = next(triplet_loader_iter) except StopIteration: triplet_loader_iter = iter(triplet_loader) data_tri_list = next(triplet_loader_iter) triplet_batch_size = data_tri_list[0].shape[0] data_tri = torch.cat(data_tri_list, 0) data_tri = data_tri.cuda(cfg.GPU_ID) data_tri = Variable(data_tri, requires_grad=True) feats = model(data_tri)[1] triplet_loss = criterion_t( feats[:triplet_batch_size], feats[triplet_batch_size:2 * triplet_batch_size], feats[2 * triplet_batch_size:]) loss = classification_loss + triplet_loss * cfg.TRIPLET_WEIGHT else: loss = classification_loss loss.backward() optimizer.step() if batch_idx % cfg.LOG_INTERVAL == 0: if cfg.TRIPLET_WEIGHT: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tAll Loss: {:.4f}\t' 'Triple Loss({}): {:.4f}\tClassification Loss: {:.4f}'. format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0], triplet_type, triplet_loss.data[0], classification_loss.data[0])) else: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tClassification Loss: {:.4f}' .format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0])) if batch_idx and batch_idx % cfg.DUMP_INTERVAL == 0: print('Model saved to {}'.format( dump_model(model, epoch, batch_idx))) print('Model saved to {}'.format(dump_model(model, epoch)))
def test_upload_model(model, model_path, args): return dump_model(model, type="local", model_path=model_path, args=args)