예제 #1
0
def train(model, trainDataloader, valDataloader, testDataloader, optimizer,
          scaler_y, opt, logger):
    val_losses = []
    test_losses = []
    model_save_path = utils.make_date_dir("./model_save")
    logger.info(f"Model save path : {model_save_path}")
    logger.info(f"Learning Rate : {opt.lr}")

    if opt.model_mode == 'single':
        train_epoch = train_single_epoch
        eval_epoch = eval_single_epoch
        test = test_single
    elif opt.model_mode == 'twice':
        train_epoch = train_twice_epoch
        eval_epoch = eval_twice_epoch
        test = test_twice

    best_loss = float('inf')
    patience = 0

    for epoch in range(int(opt.n_epochs)):
        patience += 1
        logger.info(
            "====================================Train===================================="
        )
        train_loss, _ = train_epoch(model, trainDataloader, optimizer)
        logger.info(f"[Train Epoch {epoch+1}] train Loss : {train_loss}")

        logger.info(
            "====================================Val===================================="
        )
        val_loss, _ = eval_epoch(model, valDataloader)
        logger.info(f"[Eval Epoch {epoch+1}] val Loss : {val_loss}")

        logger.info(
            "====================================Test===================================="
        )
        test_loss, test_mae = test(model, testDataloader, scaler_y)
        logger.info(
            f"[Epoch {epoch+1}] Test_throughput_Loss: {test_loss[0]}, Test_latency_Loss: {test_loss[1]} , Test_throughput_MAE_Loss: {test_mae[0]}, Test_latency_MAE_Loss: {test_mae[1]}"
        )

        if sum(test_loss) < best_loss:
            torch.save(
                model.state_dict(),
                os.path.join(model_save_path,
                             "model_" + str(epoch + 1) + ".pt"))
            best_th_loss, best_la_loss, best_th_mae_loss, best_la_mae_loss = test_loss[
                0], test_loss[1], test_mae[0], test_mae[1]
            best_loss = sum(test_loss)
            patience = 0
            best_epoch = epoch + 1
        if patience == 10:
            break
        val_losses.append(val_loss)
        test_losses.append(test_loss)

    return best_epoch, best_th_loss, best_la_loss, best_th_mae_loss, best_la_mae_loss, model_save_path
예제 #2
0
def main():
    config = Config()

    logger, log_dir = get_logger(os.path.join(config.model, "logs/"))
    logger.info("=======Model Configuration=======")
    logger.info(config.desc)
    logger.info("=================================")

    try:
        _, _, test_x, _, _, test_y, _, _, test_m, test_dt = load_agg_selected_data_mem(data_path=config.data_path, \
            x_len=config.x_len, \
            y_len=config.y_len, \
            foresight=config.foresight, \
            cell_ids=config.test_cell_ids, \
            dev_ratio=config.dev_ratio, \
            test_len=config.test_len, \
            seed=config.seed)

        model = Model(config)
        if config.latest_model:
            model_dir = find_latest_dir(
                os.path.join(config.model, 'model_save/'))
        else:
            if not model_dir:
                raise Exception(
                    "model_dir or latest_model=True should be defined in config"
                )
            model_dir = config.model_dir

        model.restore_session(model_dir)
        if len(test_y) > 100000:
            # Batch mode
            test_data = list(zip(test_x, test_m, test_y))
            test_batches = batch_loader(test_data, config.batch_size)
            total_pred = np.empty(shape=(0, test_y.shape[1]))

            for batch in test_batches:
                batch_x, batch_m, batch_y = zip(*batch)
                pred, _, _, _, _ = model.eval(batch_x, batch_m, batch_y)
                total_pred = np.r_[total_pred, pred]

        else:
            # Not batch mode
            total_pred, test_loss, test_rse, test_smape, test_mae = model.eval(
                test_x, test_m, test_y)

        result_dir = make_date_dir(os.path.join(config.model, 'results/'))
        np.save(os.path.join(result_dir, 'pred.npy'), total_pred)
        np.save(os.path.join(result_dir, 'test_y.npy'), test_y)
        np.save(os.path.join(result_dir, 'test_dt.npy'), test_dt)
        logger.info("Saving results at {}".format(result_dir))
        logger.info("Testing finished, exit program")

    except:
        logger.exception("ERROR")
예제 #3
0
 def _create_big_data_table(self):
     '''Compute all scores, collect in dataframe and store as csv.'''
     data = pd.concat([
         self.get_exp2score(quantifier_properties.Monotonicity),
         self.get_exp2score(quantifier_properties.Quantity),
         self.get_exp2score(quantifier_properties.Conservativity),
         self.get_exp2score(quantifier_properties.LempelZiv),
         self.get_exp2score(quantifier_properties.Uniformity)
     ],
                      axis=1).reset_index()
     data.columns = [
         "expression", "monotonicity", "quantity", "conservativity",
         "lempel_ziv", "uniformity"
     ]
     # When using lambda on pd.DataFrame, axis 1, it gives as input
     # to lambda function, each row, transposed, as a series.
     data.insert(
         loc=0,
         column="expr_length",
         value=data.apply(
             lambda row_series: utils.get_exp_len(row_series.expression),
             axis=1))
     # Add extra "admin properties" to identify from which language
     # generator this data stems.
     data["max_model_size"] = self.max_model_size
     data["lot"] = self.language_name
     data["subsets"] = self.subset_description
     current_date = datetime.datetime.now().strftime("%d-%m-%Y")
     data["date"] = current_date
     data.sort_values(by=["expr_length"], inplace=True)
     data.reset_index(drop=True, inplace=True)
     max_expr_len = data["expr_length"].max()
     filename = utils.make_csv_filename(self.max_model_size, max_expr_len,
                                        self.language_name)
     file_loc = utils.make_date_dir(self.csv_dir)
     data.to_csv(file_loc / filename, index=False)
     self.big_data_table = data
     return self.big_data_table
예제 #4
0
def main():
    config = Config()

    logger, log_dir = get_logger(os.path.join(config.model, "logs/"))
    logger.setLevel(30)  # set loglevel WARNING(30)
    logger.info("=======Model Configuration=======")
    logger.info(config.desc)
    logger.info("=================================")

    try:
        _, _, test_x, _, _, test_y, _, _, test_m, test_dt = load_agg_selected_data_mem(data_path=config.data_path, \
            x_len=config.x_len, \
            y_len=config.y_len, \
            foresight=config.foresight, \
            cell_ids=config.test_cell_ids, \
            dev_ratio=config.dev_ratio, \
            test_len=config.test_len, \
            seed=config.seed)

        # add dummy data
        test_x = np.concatenate([test_x] * 10, axis=0)
        test_m = np.concatenate([test_m] * 10, axis=0)
        test_y = np.concatenate([test_y] * 10, axis=0)

        print("Size of x,m,y : {}, {}, {} bytes, total {} GB".format(
            test_x.nbytes, test_m.nbytes, test_y.nbytes,
            (test_x.nbytes + test_m.nbytes + test_y.nbytes) / 1024 / 1024 /
            1024))
        print("Batch Size : {}".format(config.batch_size))

        model = Model(config)
        if config.latest_model:
            model_dir = find_latest_dir(
                os.path.join(config.model, 'model_save/'))
        else:
            if not config.model_dir:
                raise Exception(
                    "model_dir or latest_model=True should be defined in config"
                )
            model_dir = config.model_dir

        model.restore_session(model_dir)

        # always run as batch mode
        test_data = list(zip(test_x, test_m, test_y))
        test_batches = batch_loader(test_data, config.batch_size)
        total_pred = np.empty(shape=(0, test_y.shape[1]))

        time_start = time()
        for idx, batch in enumerate(test_batches):
            batch_x, batch_m, batch_y = zip(*batch)
            pred, _, _, _, _ = model.eval(batch_x, batch_m, batch_y)
            total_pred = np.r_[total_pred, pred]
        print("Batch looped {} times".format(idx + 1))
        time_end = time()

        print("Elapsed Time in Inferencing: {}".format(time_end - time_start))

        result_dir = make_date_dir(os.path.join(config.model, 'results/'))
        np.save(os.path.join(result_dir, 'pred.npy'), total_pred)
        np.save(os.path.join(result_dir, 'test_y.npy'), test_y)
        np.save(os.path.join(result_dir, 'test_dt.npy'), test_dt)
        logger.info("Saving results at {}".format(result_dir))
        logger.info("Testing finished, exit program")

    except:
        logger.exception("ERROR")
예제 #5
0
def main():
    config = Config()

    logger, log_dir = get_logger(os.path.join(config.model, "logs/"))
    logger.info("=======Model Configuration=======")
    logger.info(config.desc)
    logger.info("=================================")
    
    try:       
        train_x, dev_x, test_x, train_y, dev_y, test_y, test_dt = load_agg_data(data_path=config.data_path, \
            x_len=config.x_len, \
            y_len=config.y_len, \
            ncells=config.ncells, \
            foresight=config.foresight, \
            dev_ratio=config.dev_ratio,\
            test_len=config.test_len, \
            seed=config.seed)
            
        model = Model(config)
        train_data = list(zip(train_x,train_y))
        no_improv = 0 
        best_loss = 100
        model_dir = make_date_dir(os.path.join(config.model, 'model_save/'))
        result_dir = make_date_dir(os.path.join(config.model, 'results/'))
        logger.info("Start training")
        dev_x = np.asarray(dev_x)
        dev_y = np.asarray(dev_y)
                
        start_time = time()
        for i in range(config.num_epochs):
            train_batches = batch_loader(train_data, config.batch_size)
            epoch = i+1
            
            for batch in train_batches:
                batch_x, batch_y = zip(*batch)
                batch_x = np.asarray(batch_x)
                batch_y = np.asarray(batch_y)
                loss, rse, smape, mae, step = model.train(batch_x, batch_y)

                if step % 100 == 0:
                    logger.info("epoch: %d, step: %d, loss: %.4f, rse: %.4f, smape: %.4f, mae: %.4f" %
                                (epoch, step, loss, rse, smape, mae))
                    
            # dev score for each epoch (no mini batch)
            _, dev_loss, dev_rse, dev_smape, dev_mae = model.eval(dev_x, dev_y)
        
            if dev_loss < best_loss:
                best_loss = dev_loss
                no_improv = 0
                logger.info("New score! : dev_loss: %.4f, dev_rse: %.4f, dev_smape: %.4f, dev_mae: %.4f" % 
                            (dev_loss, dev_rse, dev_smape, dev_mae))
                logger.info("Saving model at {}".format(model_dir))
                model.save_session(os.path.join(model_dir, config.model))
            else: 
                no_improv += 1
                if no_improv == config.nepoch_no_improv:
                    logger.info("No improvement for %d epochs" % no_improv)
                #    break
        # model.save_session(os.path.join(model_dir, config.model))
        elapsed = time() - start_time
        # generating results (no mini batch)
        model.restore_session(model_dir)
        pred, test_loss, test_rse, test_smape, test_mae = model.eval(test_x, test_y)
        logger.info("test_loss: %.4f, test_rse: %.4f, test_smape: %.4f, test_mae: %.4f" % 
                    (test_loss, test_rse, test_smape, test_mae))
        
        # save results
        # np.save(os.path.join(result_dir, 'pred.npy'), pred)
        # np.save(os.path.join(result_dir, 'test_y.npy'), test_y)
        # np.save(os.path.join(result_dir, 'test_dt.npy'), test_dt)
        # logger.info("Saving results at {}".format(result_dir))
        logger.info("Elapsed training time {0:0.2f} mins".format(elapsed/60))
        logger.info("Training finished, exit program")
        
    except:
        logger.exception("ERROR")
        saver = tf.train.Saver()
        saver.restore(sess, os.path.join(model_dir, config.model))

        tfnet = TFNet.from_session(
            sess,
            inputs=[model.input_x, model.memories],  # dropout is never used
            outputs=[model.predictions])

    data_x_rdd = sc.parallelize(test_x, PARALLELISM)
    data_m_rdd = sc.parallelize(test_m, PARALLELISM)

    # create a RDD of Sample
    sample_rdd = data_x_rdd.zip(data_m_rdd).map(
        lambda x: Sample.from_ndarray(features=x, labels=np.zeros([1])))

    # distributed inference on Spark and return an RDD
    outputs = tfnet.predict(sample_rdd,
                            batch_per_thread=config.batch_size,
                            distributed=True)

    # check time when trigger actions
    time_start = time.time()
    outputs.collect()
    time_end = time.time()

    print("Elapsed Time in Inferencing: {}".format(time_end - time_start))

    result_dir = make_date_dir(os.path.join(config.model, 'zoo_results/'))

    # outputs.saveAsTextFile(os.path.join(result_dir, "result.txt"))
예제 #7
0
def main(opt: argparse, logger: logging, log_dir: str) -> Config:
    # Target workload loading
    logger.info("====================== {} mode ====================\n".format(
        opt.persistence))
    logger.info("Target workload name is {}".format(opt.target))

    """
        load knob data and IM datas, EM datas.
    """
    ### data load ###
    knob_data, aggregated_IM_data, aggregated_ops_data, aggregated_latency_data, target_knob_data, ops_target_external_data, latency_target_external_data = data_preprocessing(
        opt.target, opt.persistence, logger)

    ### clustering ###
    logger.info(
        "====================== Metrics_Simplification ====================\n")
    pruned_metrics = metric_simplification(aggregated_IM_data, logger, opt)
    logger.info("Done pruning metrics for workload {} (# of pruned metrics: {}).\n\n""Pruned metrics: {}\n".format(
        opt.persistence, len(pruned_metrics), pruned_metrics))
    metric_idxs = [i for i, metric_name in enumerate(
        aggregated_IM_data['columnlabels']) if metric_name in pruned_metrics]
    ranked_metric_data = {
        'data': aggregated_IM_data['data'][:, metric_idxs],
        'rowlabels': copy.deepcopy(aggregated_IM_data['rowlabels']),
        'columnlabels': [aggregated_IM_data['columnlabels'][i] for i in metric_idxs]
    }
    """
        For example,
            pruned_metrics : ['allocator_rss_bytes', 'rss_overhead_bytes', 'used_memory_dataset', 'rdb_last_cow_size']
    """

    ### KNOBS RANKING STAGE ###
    rank_knob_data = copy.deepcopy(knob_data)
    logger.info(
        "====================== Run_Knobs_Ranking ====================\n")
    logger.info("use mode = {}".format(opt.rki))
    ranked_knobs = knobs_ranking(knob_data=rank_knob_data,
                                 metric_data=ranked_metric_data,
                                 mode=opt.rki,
                                 logger=logger)
    logger.info("Done ranking knobs for workload {} (# ranked knobs: {}).\n\n"
                "Ranked knobs: {}\n".format(opt.persistence, len(ranked_knobs), ranked_knobs))

    top_k: dict = opt.topk
    top_k_knobs = utils.get_ranked_knob_data(ranked_knobs, knob_data, top_k)
    target_knobs = utils.get_ranked_knob_data(
        ranked_knobs, target_knob_data, top_k)
    knob_save_path = utils.make_date_dir('./save_knobs')
    logger.info("Knob save path : {}".format(knob_save_path))
    logger.info("Choose Top {} knobs : {}".format(
        top_k, top_k_knobs['columnlabels']))
    np.save(os.path.join(knob_save_path, 'knobs_{}.npy'.format(top_k)),
            np.array(top_k_knobs['columnlabels']))

    # In double version
    aggregated_data = [aggregated_ops_data, aggregated_latency_data]
    target_external_data = [
        ops_target_external_data, latency_target_external_data]
    if not opt.atr:
        model, optimizer = set_model(opt)
        model_save_path = utils.make_date_dir("./model_save")
        logger.info("Model save path : {}".format(model_save_path))
        logger.info("Learning Rate : {}".format(opt.lr))
        best_epoch, best_loss, best_mae = defaultdict(
            int), defaultdict(float), defaultdict(float)
        columns = ['Totals_Ops/sec', 'Totals_p99_Latency']

        ### train dnn ###
        for i in range(2):
            trainDataloader, valDataloader, testDataloader, scaler_y = prepareForTraining(
                opt, top_k_knobs, target_knobs, aggregated_data[i], target_external_data[i], i)
            logger.info(
                "====================== {} Pre-training Stage ====================\n".format(opt.model_mode))

            best_epoch[columns[i]], best_loss[columns[i]], best_mae[columns[i]] = train(
                model, trainDataloader, valDataloader, testDataloader, optimizer, scaler_y, opt, logger, model_save_path, i)

        for name in best_epoch.keys():
            logger.info("\n\n[{} Best Epoch {}] Best_Loss : {} Best_MAE : {}".format(
                name, best_epoch[name], best_loss[name], best_mae[name]))

        config = Config(opt.persistence, opt.db, opt.cluster, opt.rki,
                        opt.topk, opt.model_mode, opt.n_epochs, opt.lr)
        config.save_double_results(opt.target, best_epoch['Totals_Ops/sec'], best_epoch[name], best_loss['Totals_Ops/sec'],
                                best_loss[name], best_mae['Totals_Ops/sec'], best_mae[name], model_save_path, log_dir, knob_save_path)
        return config
    else:
        models = set_rf_model()
        for i in range(2):
            X_tr, y_train = prepare_ATR_learning(
                    opt, top_k_knobs, target_knobs, aggregated_data[i], target_external_data[i], i)        
            models[i].fit(X_tr, y_train)
        
        pruned_configs, external_datas, defaults, scaler_X, scaler_ys = double_prepareForGA(opt, top_k_knobs['columnlabels'])
        current_solution_pools, targets = make_solution_pool(opt, pruned_configs, external_datas, defaults)
        fitness_function = RF_fitness

        n_configs = top_k_knobs['columnlabels'].shape[0]
        #set remain ratio
        n_pool_half = opt.n_pool//2
        #mutation ratio
        mutation = int(n_configs*0.5)
        GA_options = [n_configs, n_pool_half, mutation]

        top_k_config_path, name, connect = ATR_GA(opt, models, targets, top_k_knobs, current_solution_pools, fitness_function, GA_options, scaler_X, scaler_ys, logger)

        if connect:
            server_connection(opt, top_k_config_path, name)
        else:
            logger.info("Because appednfsync is 'always', Fin GA")
            return 0
    
        import datetime
        #save results
        i = 0
        today = datetime.datetime.now()
        name = 'result_'+opt.persistence+'-'+today.strftime('%Y%m%d')+'-'+'%02d'%i+'.csv'
        while os.path.exists(os.path.join('./GA_config/', name)):
            i += 1
            name = 'result_'+opt.persistence+'-'+today.strftime('%Y%m%d')+'-'+'%02d'%i+'.csv'
        os.rename(f'./GA_config/result_{opt.persistence.lower()}_external_GA.csv', './GA_config/'+name)
        logger.info(name)
        df = pd.read_csv('./GA_config/'+name)
        logger.info(df["Totals_Ops/sec"])
        logger.info(df["Totals_p99_Latency"])
예제 #8
0
def main():
    parser = argparse.ArgumentParser()

    #Required parameters
    parser.add_argument("-m", "--model", type=str, default="Bin_normal",
                        required=True, choices=["Bin_normal", "Bin-uniform", "Bin_3d"],
                        help="Model selected in the list: Bin_normal, Bin-uniform,Bin_3d")
    #Optional parameters

    args = parser.parse_args()
    args = parser.parse_args()
    if args.model == "Bin_normal":
        from Bin_normal.config import Config
        from Bin_normal.model import Model
        config = Config()

    elif args.model == "Bin_3d":
        from Bin_3d.config import Config
        from Bin_3d.model import Model
        config = Config()

    else:
        from MANN.config import Config
        from MANN.model import Model
        config = Config()


    logger = get_logger(os.path.join(config.model, "logs/"))
    logger.info("=======Model Configuration======")
    logger.info(config.desc)
    logger.info("================================")

    try:
        train_x, dev_x, test_x, train_y, dev_y, test_y = load_data_from_csv(data_path=config.data_path,
                                                                            x_len=config.x_len,
                                                                            y_len=config.y_len,
                                                                            foresight=config.foresight,
                                                                            dev_ratio=config.dev_ratio,
                                                                            #test_ratio=config.test_ratio,
                                                                            seed=config.seed)
        logger.info("train_x shape: {}, dev_x shape: {}, test_x shape: {}"
                    .format(train_x.shape, dev_x.shape, test_x.shape))
        logger.info("train_y shape: {}, dev_y shape: {}, test_y shape: {}"
                    .format(train_y.shape, dev_y.shape, test_y.shape))
        model = Model(config)
        train_data = list(zip(train_x, train_y))
        no_improv = 0
        best_loss = 100
        model_dir = make_date_dir(os.path.join(config.model, 'model_save/'))
        result_dir = make_date_dir(os.path.join(config.model, 'results/'))

        start_time = time()
        for i in range(config.num_epochs):
            train_batches = batch_loader(train_data, config.batch_size)
            epoch = i+1

            for batch in train_batches:
                batch_x, batch_y = zip(*batch)
                loss, acc, step = model.train(batch_x, batch_y)

                if step % 100 == 0:
                    logger.info("epoch: %d, step: %d, loss: %4f, acc: %4f" %
                                (epoch, step, loss, acc))

            # dev score for each epoch (no mini batch)
            _, dev_loss, dev_acc = model.eval(dev_x, dev_y)

            if dev_loss < best_loss:
                best_loss = dev_loss
                no_improv = 0
                logger.info("New score! : dev_loss: %4f, dev_acc: %4f" %
                            (dev_loss, dev_acc))
                logger.info("Saving model at {}".format(model_dir))
                model.save_session(os.path.join(model_dir, config.model))
            else:
                no_improv += 1
                if no_improv == config.nepoch_no_improv:
                    logger.info("No improvement for %d epochs" % no_improv)
                    break

        elapsed = time()-start_time
        # generating results (no mini batch)
        model.restore_session(model_dir)
        pred, test_loss, test_acc = model.eval(test_x, test_y)
        logger.info("test_loss: %4f, test_acc: %4f" %
                    (test_loss, test_acc, ))

        # save results
        np.save(os.path.join(result_dir, 'pred.npy'), pred)
        np.save(os.path.join(result_dir, 'test_y.npy'), test_y)
        logger.info("Saving results at {}".format(result_dir))
        logger.info("Elapsed training time {0:0.4f}".format(elapsed))
        logger.info("Training finished, exit program")

    except:
        logger.exception("ERROR")
예제 #9
0
def main():
    config = Config()

    logger, log_dir = get_logger(os.path.join(config.model, "logs/"))
    logger.info("=======Model Configuration=======")
    logger.info(config.desc)
    logger.info("=================================")

    try:
        train_x, dev_x, test_x, train_y, dev_y, test_y, train_m, dev_m, test_m, test_dt = load_agg_selected_data_mem(data_path=config.data_path, \
            x_len=config.x_len, \
            y_len=config.y_len, \
            foresight=config.foresight, \
            cell_ids=config.train_cell_ids, \
            dev_ratio=config.dev_ratio, \
            test_len=config.test_len, \
            seed=config.seed)

        model = Model(config)
        if config.allow_gpu:
            model = model.cuda()

        optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

        # get train data
        TrainDataSet = BatchDataset(train_x, train_m, train_y)
        TrainSampler = tud.RandomSampler(TrainDataSet)
        TrainDataLoader = tud.DataLoader(TrainDataSet,
                                         batch_size=config.batch_size,
                                         sampler=TrainSampler,
                                         num_workers=2)

        # get valid Data
        dev_x, dev_m, dev_y = torch.Tensor(dev_x), torch.Tensor(
            dev_m), torch.Tensor(dev_y)

        if config.allow_gpu:
            dev_x, dev_m, dev_y = dev_x.cuda(), dev_m.cuda(), dev_y.cuda()

        step = 0
        no_improv = 0
        best_loss = 100
        model_dir = make_date_dir(os.path.join(config.model, 'model_save/'))
        logger.info("Start training")

        start_time = time()
        for i in range(config.num_epochs):
            epoch = i + 1

            # train
            model.train()
            for batch_x, batch_m, batch_y in TrainDataLoader:
                step = step + 1

                if config.allow_gpu:
                    batch_x, batch_m, batch_y = batch_x.cuda(), batch_m.cuda(
                    ), batch_y.cuda()

                optimizer.zero_grad()
                prediction, loss, rse, smape, mae = model(
                    batch_x, batch_m, batch_y)

                loss.backward()
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), config.clip)
                optimizer.step()

                if step % 100 == 0:
                    logger.info(
                        "epoch: %d, step: %d, loss: %.4f, rse: %.4f, smape: %.4f, mae: %.4f"
                        % (epoch, step, loss, rse, smape, mae))

            # dev score for each epoch (no mini batch)
            with torch.no_grad():
                model.eval()
                prediction, dev_loss, dev_rse, dev_smape, dev_mae = model(
                    dev_x, dev_m, dev_y)

            if dev_loss < best_loss:
                best_loss = dev_loss
                no_improv = 0
                # logger.info("New score! : dev_loss: %.4f, dev_rse: %.4f, dev_smape: %.4f, dev_mae: %.4f" %
                #             (dev_loss, dev_rse, dev_smape, dev_mae))
                # logger.info("Saving model at {}".format(model_dir))
                torch.save(model, model_dir + "/" + config.model + ".pth")
            else:
                no_improv += 1
                if no_improv == config.nepoch_no_improv:
                    logger.info("No improvement for %d epochs" % no_improv)
                    break

        elapsed = time() - start_time
        # generating results (no mini batch)
        logger.info("Saving model at {}".format(model_dir))
        logger.info("Elapsed training time {0:0.2f} mins".format(elapsed / 60))
        logger.info("Training finished, exit program")

    except:
        logger.exception("ERROR")
예제 #10
0
def main(opt: argparse, logger: logging, log_dir: str) -> Config:
    #Target workload loading
    logger.info(
        f"====================== {opt.persistence} mode ====================\n"
    )

    logger.info(f"Target workload name is {opt.target}")

    knob_data, aggregated_IM_data, aggregated_EM_data, target_knob_data, target_external_data = data_preprocessing(
        opt.target, opt.persistence, logger)

    logger.info(
        "====================== Metrics_Simplification ====================\n")
    pruned_metrics = metric_simplification(aggregated_IM_data, logger, opt)
    logger.info(
        f"Done pruning metrics for workload {opt.persistence} (# of pruned metrics: {len(pruned_metrics)}).\n\n"
        f"Pruned metrics: {pruned_metrics}\n")
    metric_idxs = [
        i for i, metric_name in enumerate(aggregated_IM_data['columnlabels'])
        if metric_name in pruned_metrics
    ]
    ranked_metric_data = {
        'data':
        aggregated_IM_data['data'][:, metric_idxs],
        'rowlabels':
        copy.deepcopy(aggregated_IM_data['rowlabels']),
        'columnlabels':
        [aggregated_IM_data['columnlabels'][i] for i in metric_idxs]
    }

    ### KNOBS RANKING STAGE ###
    rank_knob_data = copy.deepcopy(knob_data)
    logger.info(
        "====================== Run_Knobs_Ranking ====================\n")
    logger.info(f"use mode = {opt.rki}")
    ranked_knobs = knobs_ranking(knob_data=rank_knob_data,
                                 metric_data=ranked_metric_data,
                                 mode=opt.rki,
                                 logger=logger)
    logger.info(
        f"Done ranking knobs for workload {opt.persistence} (# ranked knobs: {len(ranked_knobs)}).\n\n"
        f"Ranked knobs: {ranked_knobs}\n")

    top_k: int = opt.topk
    top_k_knobs = utils.get_ranked_knob_data(ranked_knobs, knob_data, top_k)
    target_knobs = utils.get_ranked_knob_data(ranked_knobs, target_knob_data,
                                              top_k)
    knob_save_path = utils.make_date_dir('./save_knobs')
    logger.info(f"Knob save path : {knob_save_path}")
    logger.info(f"Choose Top {top_k} knobs : {top_k_knobs['columnlabels']}")
    np.save(os.path.join(knob_save_path, f'knobs_{top_k}.npy'),
            np.array(top_k_knobs['columnlabels']))

    model, optimizer, trainDataloader, valDataloader, testDataloader, scaler_y = prepare_for_training(
        opt, top_k_knobs, target_knobs, aggregated_EM_data,
        target_external_data)

    logger.info(
        f"====================== {opt.model_mode} Pre-training Stage ====================\n"
    )

    best_epoch, best_th_loss, best_la_loss, best_th_mae_loss, best_la_mae_loss, model_path = train(
        model, trainDataloader, valDataloader, testDataloader, optimizer,
        scaler_y, opt, logger)
    logger.info(
        f"\n\n[Best Epoch {best_epoch}] Best_th_Loss : {best_th_loss} Best_la_Loss : {best_la_loss} Best_th_MAE : {best_th_mae_loss} Best_la_MAE : {best_la_mae_loss}"
    )

    config = Config(opt.persistence, opt.db, opt.cluster, opt.rki, opt.topk,
                    opt.model_mode, opt.n_epochs, opt.lr)
    config.save_results(opt.target, best_epoch, best_th_loss, best_la_loss,
                        best_th_mae_loss, best_la_mae_loss, model_path,
                        log_dir, knob_save_path)

    return config
예제 #11
0
def train(model, trainDataset, valDataset, testDataset, optimizer, scheduler,
          tokenizer):
    """
    Train using train_epoch, eval_epoch, test_score_model.

    Adopt EarlyStopping checking valid loss.
    """
    val_losses = []
    test_accuracy = []

    model_save_path = utils.make_date_dir("./model_save")
    logger.info("Model save path: {}".format(model_save_path))

    best_loss = float('inf')
    best_acc = 0
    patience = 0

    if args.num_labels == 1:
        test_score = test_MSE_score_model
    else:
        test_score = test_CE_score_model

    for epoch in range(int(args.n_epochs)):
        patience += 1

        logger.info("=====================Train======================")
        train_loss, text_loss, visual_loss, speech_loss, label_loss = train_epoch(
            model, trainDataset, optimizer, scheduler, tokenizer)
        logger.info(
            "[Train Epoch {}] Joint Loss : {} Text Loss : {} Visual Loss : {} Speech Loss : {} Label Loss : {}"
            .format(epoch + 1, train_loss, text_loss, visual_loss, speech_loss,
                    label_loss))

        logger.info("=====================Valid======================")
        valid_loss, text_loss, visual_loss, speech_loss, label_loss, preds, labels = eval_epoch(
            model, valDataset, optimizer, scheduler, tokenizer)
        logger.info(
            "[Val Epoch {}] Joint Loss : {} Text Loss : {} Visual Loss : {} Speech Loss : {} Label Loss : {}"
            .format(epoch + 1, valid_loss, text_loss, visual_loss, speech_loss,
                    label_loss))

        logger.info("=====================Test======================")
        test_acc, test_mae, test_f_score = test_score(preds, labels)

        logger.info(
            "[Epoch {}] Test_ACC : {}, Test_MAE : {}, Test_F_Score: {}".format(
                epoch + 1, test_acc, test_mae, test_f_score))

        if test_acc > best_acc:
            torch.save(
                model.state_dict(),
                os.path.join(model_save_path,
                             'model_' + str(epoch + 1) + ".pt"))
            best_acc = test_acc
            patience = 0

        if patience == 15:
            break

        val_losses.append(valid_loss)
        test_accuracy.append(test_acc)
예제 #12
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "-m",
        "--model",
        type=str,
        default="LSTNet",
        required=True,
        choices=["LSTNet", "MANN", "AR_reg", "AR", "AR_mem"],
        help="Model selected in the list: LSTNet, MANN, AR_reg, AR, AR_mem")

    # Optional parameters

    args = parser.parse_args()
    if args.model == "LSTNet":
        from LSTNet.config import Config
        from LSTNet.model import Model
        config = Config()
    elif args.model == "MANN":
        from MANN.config import Config
        from MANN.model import Model
        config = Config()
    elif args.model == "AR_reg":
        from AR_reg.config import Config
        from AR_reg.model import Model
        config = Config()
    elif args.model == "AR_mem":
        from AR_mem.config import Config
        from AR_mem.model import Model
        config = Config()
    elif args.model == "AR":
        from AR.config import Config
        from AR.model import Model
        config = Config()

    COL_LIST = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

    logger = get_logger(os.path.join(config.model, "logs/"))
    logger.info("=======Model Configuration======")
    logger.info(config.desc)
    logger.info("================================")

    try:

        train_x, dev_x, test_x, train_y, dev_y, test_y, train_m, dev_m, test_m, test_dt = load_data_mem(
            data_path=config.data_path,
            x_col_list=COL_LIST,
            y_col_list=COL_LIST,
            x_len=config.x_len,
            y_len=config.y_len,
            mem_len=config.mem_len,
            foresight=config.foresight,
            dev_ratio=config.dev_ratio,
            test_len=config.test_len,
            seed=config.seed)

        model = Model(config)
        train_data = list(zip(train_x, train_m, train_y))
        no_improv = 0
        best_loss = 100
        model_dir = make_date_dir(os.path.join(config.model, 'model_save/'))
        result_dir = make_date_dir(os.path.join(config.model, 'results/'))
        logger.info("Start training")
        dev_x = np.asarray(dev_x)
        dev_y = np.asarray(dev_y)

        start_time = time()
        for i in range(config.num_epochs):
            train_batches = batch_loader(train_data, config.batch_size)
            epoch = i + 1

            for batch in train_batches:
                batch_x, batch_m, batch_y = zip(*batch)
                loss, rmse, rse, smape, mae, step = model.train(
                    batch_x, batch_m, batch_y)

                if step % 100 == 0:
                    logger.info(
                        "epoch: %d, step: %d, loss: %4f, rmse: %4f, rse: %4f, smape: %4f, mae: %4f"
                        % (epoch, step, loss, rmse, rse, smape, mae))

            # dev score for each epoch (no mini batch)
            _, dev_loss, dev_rmse, dev_rse, dev_smape, dev_mae = model.eval(
                dev_x, dev_m, dev_y)

            if dev_loss < best_loss:
                best_loss = dev_loss
                no_improv = 0
                logger.info(
                    "New score! : dev_loss: %4f, rmse: %4f, dev_rse: %4f, dev_smape: %4f, dev_mae: %4f"
                    % (dev_loss, dev_rmse, dev_rse, dev_smape, dev_mae))
                logger.info("Saving model at {}".format(model_dir))
                model.save_session(os.path.join(model_dir, config.model))
            else:
                no_improv += 1
                if no_improv == config.nepoch_no_improv:
                    logger.info("No improvement for %d epochs" % no_improv)
                    break

        elapsed = time() - start_time
        # generating results (no mini batch)
        model.restore_session(model_dir)
        pred, test_loss, test_rmse, test_rse, test_smape, test_mae = model.eval(
            test_x, test_m, test_y)
        logger.info(
            "test_loss: %4f, test_rmse: %4f, test_rse: %4f, test_smape: %4f, test_mae: %4f"
            % (test_loss, test_rmse, test_rse, test_smape, test_mae))

        # save results
        np.save(os.path.join(result_dir, 'pred.npy'), pred)
        np.save(os.path.join(result_dir, 'test_y.npy'), test_y)
        np.save(os.path.join(result_dir, 'test_dt.npy'), test_y)
        logger.info("Saving results at {}".format(result_dir))
        logger.info("Elapsed training time {0:0.2f}".format(elapsed / 60))
        logger.info("Training finished, exit program")

        t = np.linspace(0, pred.shape[0], num=pred.shape[0])
        mae = np.mean(np.abs(test_y - pred))
        mape = np.mean(np.abs((test_y - pred) / test_y))
        plt.rcParams['figure.figsize'] = [20, 4]
        plt.plot(t, test_y, "r", alpha=0.5)
        #plt.ylim(0.5,1.0)
        plt.plot(t, pred, "b")
        #plt.title("{}, mape:{mape:.5f}, mae:{mae:.5f}".format(raw.columns[1], mape=mape, mae=mae), size=20)
        plt.legend(("actual", "pred"), loc="upper left")
        plt.grid()
        plt.show()
        plt.savefig(os.path.join(config.model, "image/figure.png"))

    except:
        logger.exception("ERROR")