Пример #1
0
    def build_model(self):
        if self.params['multi_gpus']:
            self.strategy = tf.distribute.MirroredStrategy(devices=None)
        else:
            self.strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")

        with self.strategy.scope():
            self.model = Yolo(yaml_dir=self.params['yaml_dir'])
            self.anchors = self.model.module_list[-1].anchors   
            self.stride = self.model.module_list[-1].stride
            self.num_classes = self.model.module_list[-1].num_classes

            self.loss_fn = YoloLoss(self.model.module_list[-1].anchors,
                                    ignore_iou_threshold=0.3,
                                    num_classes=self.num_classes,
                                    label_smoothing=self.params['label_smoothing'],
                                    img_size=self.params['img_size'])
            self.optimizer = Optimizer('adam')()   
Пример #2
0
def get_lineup(df, sport, site):
    s3 = boto3.resource('s3')
    bucket = s3.Bucket('my-dfs-data')

    if sport == 'nba':
        constants = ['name', 'pos', 'event_id']
        model_variables = ['pp', 'ppg', 'salary', 'lovecount', 'hatecount']

        #need to replace this
        #df = df[df.oteam.isin(['DAL', 'DEN', 'ATL', 'HOU'])]
        #df = df[df.oteam.isin(['CHI', 'NO', 'NY', 'UTA', 'MIL', 'GS'])]
        #df = df[df.oteam.isin(['NY', 'UTA', 'MIL', 'GS'])]

    elif sport == 'pga':
        constants = ['name', 'event_id']
        model_variables = [
            'pp', 'ppg', 'salary', 'vegas_odds_0', 'vegas_value_0'
        ]

    for col in model_variables:
        df[col] = pd.to_numeric(df[col])

    df_tmp = df[constants + model_variables].dropna()
    preds = df_tmp[df_tmp.event_id == df.event_id.max()]

    obj = pickle.loads(
        s3.Bucket("my-dfs-data").Object("{}/modeling/model_{}.pkl".format(
            sport, site)).get()['Body'].read())
    trace, scaler, player_ids = obj[0], obj[1], obj[2]

    preds = preds.merge(player_ids, how='left', on='name')
    tmp_preds = scaler.transform(preds[model_variables])

    preds['posterior'] = None

    for i, name in enumerate(preds.name):
        idx = preds.loc[preds.name == name, 'player_idx'].values[0]
        if not np.isnan(idx):
            preds.loc[preds.name == name, 'posterior'] = [[
                get_post_preds(i, trace, tmp_preds, idx)
            ]]
        else:
            print(name)
            preds.loc[preds.name == name, 'posterior'] = np.nan

    preds = preds.dropna()
    preds['preds'] = preds['posterior'].apply(lambda x: x[0].mean())

    #use optimizer for lineups
    opt = Optimizer(preds, sport, site)
    opt.solve()
    opt.get_lineup()

    return opt.lineup
Пример #3
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir +".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)

    optimizer = Optimizer('sgd', 'adam', model, 'gcn', lr=data.HP_lr,
                      lr_gcn=data.HP_lr_gcn, momentum=data.HP_momentum, lr_decay=data.HP_lr_decay)
    best_dev = -10
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" %(idx,data.HP_iteration))
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        sample_loss_flat = 0 
        sample_loss_graph = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num//batch_size+1
        for batch_id in range(total_batch):
            start = batch_id*batch_size
            end = (batch_id+1)*batch_size
            if end >train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, ans_matrix, wgt_matrix  = batchify_with_label(data, instance, data.HP_gpu, True, data.sentence_classification)
            instance_count += 1
            loss_flat, loss_graph, loss, tag_seq = model.calculate_loss(idx, batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, ans_matrix, wgt_matrix)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.item()
            sample_loss_flat += loss_flat.item()
            sample_loss_graph += loss_graph.item()
            total_loss += loss.item()
            if end%500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print("     Instance: %s; Time: %.2fs; loss_flat: %.4f; loss_graph: %.4f; loss: %.4f; acc: %.4f"%(end, temp_cost, sample_loss_flat, sample_loss_graph, sample_loss, (right_token+0.)/whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
                sample_loss_flat = 0 
                sample_loss_graph = 0
            loss.backward()

            if data.HP_clip is not None:
                torch.nn.utils.clip_grad_norm_(model.parameters(), data.HP_clip)
                
            optimizer.step()
            model.zero_grad()

        optimizer.update(idx+1, batch_id+1, total_batch)

        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss_flat: %.4f; loss_graph: %.4f; loss: %.4f; acc: %.4f"%(end, temp_cost, sample_loss_flat, sample_loss_graph, sample_loss, (right_token+0.)/whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
            exit(1)
        # continue
        speed, p, r, f, _,_ = evaluate(data, model, "dev", idx)
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print("Test: time: %.2fs, speed: %.2fst/s; [p: %.4f, r: %.4f, f:  %.4f]"%(dev_cost, speed, p, r, f))

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = data.model_dir +'.'+ str(idx) + ".model"
            print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score

        # ## decode test
        speed, p, r, f, _,_ = evaluate(data, model, "test", idx)
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print("Test: time: %.2fs, speed: %.2fst/s; [p: %.4f, r: %.4f, f: %.4f]"%(test_cost, speed, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))

        gc.collect()
Пример #4
0
class Trainer(object):
    """ Trainer class that uses the dataset and model to train
    # Usage
    data_loader = tf.data.Dataset()
    trainer = Trainer(params)
    trainer.train(data_loader)
    """
    def __init__(self, params):
        """ Constructor
        :param params: dict, with dir and training parameters
        """
        self.params = params
        if os.path.exists(self.params['log_dir']):
            shutil.rmtree(self.params['log_dir'])
        self.log_writer = tf.summary.create_file_writer(self.params['log_dir'])
        self.global_step = tf.Variable(0, trainable=False, dtype=tf.int64)
        self.build_model()

    def build_model(self):
        """ Build the model,
        define the training strategy and model, loss, optimizer
        :return:
        """
        if self.params['multi_gpus']:
            self.strategy = tf.distribute.MirroredStrategy(devices=None)
        else:
            self.strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")

        with self.strategy.scope():
            self.model = Yolo(yaml_dir=self.params['yaml_dir'])
            self.anchors = self.model.module_list[-1].anchors
            self.stride = self.model.module_list[-1].stride
            self.num_classes = self.model.module_list[-1].num_classes

            self.loss_fn = YoloLoss(
                self.model.module_list[-1].anchors,
                ignore_iou_threshold=0.3,
                num_classes=self.num_classes,
                label_smoothing=self.params['label_smoothing'],
                img_size=self.params['img_size'])
            self.optimizer = Optimizer('adam')()

    def train(self, train_dataset, valid_dataset=None, transfer='scratch'):
        """ train function
        :param train_dataset: train dataset built by tf.data
        :param valid_dataset: valid dataset build by td.data, optional
        :param transfer: pretrain
        :return:
        """
        steps_per_epoch = train_dataset.len / self.params['batch_size']
        self.total_steps = int(self.params['n_epochs'] * steps_per_epoch)
        self.params[
            'warmup_steps'] = self.params['warmup_epochs'] * steps_per_epoch

        with self.strategy.scope():
            self.lr_scheduler = LrScheduler(self.total_steps, self.params)
            # => tf.keras.Model
            self.model = self.model(self.params['img_size'])

            ckpt = tf.train.Checkpoint(model=self.model,
                                       optimizer=self.optimizer)
            ckpt_manager = tf.train.CheckpointManager(
                ckpt, self.params['checkpoint_dir'], max_to_keep=5)
            if transfer == 'darknet':
                print("Load weights from ")
                model_pretrain = Yolo(self.params['yaml_dir'])()
                model_pretrain.load_weights()
                self.model.get_layer().set_weights()
            elif transfer == 'resume':
                print("Load weights from latest checkpoint")
                ckpt.restore(ckpt_manager.latest_checkpoint)
            elif transfer == 'scratch':
                print("Train from scratch")
                print(self.model.summary())

        train_dataset = self.strategy.experimental_distribute_dataset(
            train_dataset)

        for epoch in range(1, self.params['n_epochs'] + 1):
            for step, (image, target) in enumerate(train_dataset):
                loss = self.dist_train_step(image, target)
                print('=> Epoch {}, Step {}, Loss {:.5f}'.format(
                    epoch, self.global_step.numpy(), loss.numpy()))
                with self.log_writer.as_default():
                    tf.summary.scalar('loss', loss, step=self.global_step)
                    tf.summary.scalar('lr',
                                      self.optimizer.lr,
                                      step=self.global_step)
                self.log_writer.flush()

            if epoch % 3 == 0:
                ckpt_save_path = ckpt_manager.save()
                print('Saving checkpoint for epoch {} at {}'.format(
                    epoch, ckpt_save_path))

        self.export_model()

    # @tf.function
    def train_step(self, image, target):
        with tf.GradientTape() as tape:
            logit = self.model(image, training=True)
            iou_loss, conf_loss, prob_loss = self.loss_fn(target, logit)
            total_loss = iou_loss + conf_loss + prob_loss

        gradients = tape.gradient(total_loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(gradients, self.model.trainable_variables))

        lr = self.lr_scheduler.step()
        self.optimizer.lr.assign(lr)
        self.global_step.assign_add(1)
        return total_loss

    @tf.function
    def dist_train_step(self, image, target):
        with self.strategy.scope():
            loss = self.strategy.run(self.train_step, args=(image, target))
            total_loss_mean = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                                   loss,
                                                   axis=None)
            return total_loss_mean

    def validate(self, valid_dataset):
        valid_loss = []
        for step, (image, target) in enumerate(valid_dataset):
            step_valid_loss = self.valid_step(image, target)
            valid_loss.append(step_valid_loss)
        return np.mean(valid_loss)

    def valid_step(self, image, label):
        logit = self.model(image, training=False)
        iou_loss, conf_loss, prob_loss = self.loss_fn(label, logit)
        return iou_loss + conf_loss + prob_loss

    def export_model(self):
        tf.saved_model.save(self.model, self.params['saved_model_dir'])
        print("pb model saved in {}".format(self.params['saved_model_dir']))
Пример #5
0
images_path = "/home/thuan/Desktop/visual_slam/Data_for_superglue/TUM_images_SuperGlue/sift/"
load_data = CRDataset_train(poses_path, images_path, device)
# load_data_test = CRDataset_test(poses_path, images_path, config, device)
model = md.MainModel(config['main_model']).train().to(device)
superpoint = SuperPoint(config.get('superpoint', {})).eval().to(device)

criterion = PoseNetCriterion().to(device)
#optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer_configs = {
    'method': 'adam',
    'base_lr': 1e-4,
    'weight_decay': 5e-4,
    'lr_decay': 1,
    'lr_stepvalues': [k / 4 * 400 for k in range(1, 5)]
}
optimizer = Optimizer(model.parameters(), **optimizer_configs)
train_loader = DataLoader(load_data,
                          batch_size=6,
                          num_workers=0,
                          shuffle=False)

# model.eval()
# model(load_data_test[0]["features"])

number_batch = len(train_loader)
his_losses = []
for epoch in range(400):
    optimizer.learner.zero_grad()
    pbar = enumerate(train_loader)
    pbar = tqdm(pbar, total=number_batch)
    count = 0
Пример #6
0
def main():
    parser = parse()
    args = parser.parse_args()

    # Device
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids
        gpu_ids = [i for i in range(len(args.gpu_ids.split(',')))]

    # Data
    download_data = Download()
    download_data.train_data()
    train_dataset = audio_skeleton_dataset(download_data.train_dst, 'train')
    val_dataset = audio_skeleton_dataset(download_data.train_dst, 'val')

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch,
                              shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=args.batch, shuffle=False)

    # Model
    movement_net = MovementNet(
        args.d_input, args.d_output_body, args.d_output_rh, args.d_model,
        args.n_block, args.n_unet, args.n_attn, args.n_head, args.max_len,
        args.dropout, args.pre_lnorm,
        args.attn_type).to('cuda:0' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available() and len(args.gpu_ids.split(',')) > 1:
        movement_net = nn.DataParallel(movement_net, device_ids=gpu_ids)
    optimizer = Optimizer(
        torch.optim.Adam(movement_net.parameters(),
                         betas=(0.9, 0.98),
                         eps=1e-09), 1.0, args.d_model, args.warmup_steps)

    #------------------------ START TRAINING ---------------------------------#
    print('Training... \n')
    if args.early_stop_iter > 0:
        counter = 0
    min_val_loss = float('inf')

    Epoch_train_loss = []
    Epoch_val_loss = []
    for e in range(args.epoch):
        print("epoch %d" % (e + 1))

        # Training stage
        movement_net.train()

        pose_loss = []
        for X_train, y_train, seq_len in train_loader:

            X_train, lengths = sort_sequences(X_train, seq_len)
            y_train, _ = sort_sequences(y_train, seq_len)
            mask = y_train != 0
            mask = mask.type('torch.FloatTensor').to(
                'cuda:0' if torch.cuda.is_available() else 'cpu')

            full_output = movement_net.forward(X_train, lengths)

            loss = L1_loss(full_output, y_train, mask[:, :, :1])
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(movement_net.parameters(), 1.)
            optimizer.step()

            pose_loss.append(loss.data.cpu().numpy())

        Epoch_train_loss.append(np.mean(pose_loss))
        print('train loss: ' + str(np.mean(pose_loss)))

        # Validation stage
        movement_net.eval()

        pose_loss = []
        with torch.no_grad():
            for X_val, y_val, seq_len in val_loader:

                X_val, lengths = sort_sequences(X_val, seq_len)
                y_val, _ = sort_sequences(y_val, seq_len)
                mask = y_val != 0
                mask = mask.type('torch.FloatTensor').to(
                    'cuda:0' if torch.cuda.is_available() else 'cpu')

                full_output = movement_net.forward(X_val, lengths)

                loss = L1_loss(full_output, y_val, mask[:, :, :1])
                pose_loss.append(loss.data.cpu().numpy())

            Epoch_val_loss.append(np.mean(pose_loss))
            print('val loss: ' + str(np.mean(pose_loss)) + '\n')

            if counter == args.early_stop_iter:
                print("------------------early stopping------------------\n")
                break
            else:
                if min_val_loss > np.mean(pose_loss):
                    min_val_loss = np.mean(pose_loss)
                    counter = 0
                    if not os.path.exists('checkpoint'):
                        os.makedirs('checkpoint')
                    if torch.cuda.is_available() and len(
                            args.gpu_ids.split(',')) > 1:
                        state_dict = movement_net.module.state_dict()
                    else:
                        state_dict = movement_net.state_dict()
                    torch.save(
                        {
                            'epoch': e + 1,
                            'model_state_dict': {
                                'movement_net': state_dict
                            },
                            'optimizer_state_dict': optimizer.state_dict(),
                            'loss': min_val_loss
                        }, args.checkpoint)
                else:
                    counter += 1