Пример #1
0
 def save_to_file(self, algorithm_name, number_of_elements,
                  number_of_operations, algorithm_time):
     output_filename = self.output_file_name()
     output_path = self.output_path()
     my_writer = Writer(output_path, output_filename)
     my_writer.save_to_file(algorithm_name, self.name, number_of_elements,
                            number_of_operations, algorithm_time)
Пример #2
0
    def convertRam2Xml(self, schema, xml_path):
        """
        Create ram representation of incoming Schema object
        :param schema:
        :param xml_path:
        :return:
        """
        if schema is None:
            raise ParseError("Schema not found", self)
        node = self.create_schema(schema)
        node.appendChild(self.xml.createElement("custom"))
        if schema.domains:
            domains = self.xml.createElement("domains")
            for domain in self.create_domain(schema.domains):
                domains.appendChild(domain)
            node.appendChild(domains)

        tables = self.xml.createElement("tables")
        for table in self.create_table(schema.tables):
            tables.appendChild(table)

        node.appendChild(tables)
        try:
            self.xml.appendChild(node)
            if xml_path:
                Writer.write_xml(xml_path, self.xml)
        except Exception:
            raise ItemNotFoundException("domains, tables", schema.name)
Пример #3
0
 def pack(self):
     packet_data = self.buffer
     Writer.__init__(self)
     self.writeShort(self.id)
     self.writeUInt(len(packet_data), 3)
     self.writeShort(0)  # Version
     self.buffer += packet_data + b'\xff\xff\x00\x00\x00\x00\x00'
Пример #4
0
    def label(self, start: int = 0, end: int = None):
        if end is None:
            end = len(self.dataframe)

        dataframe = self.dataframe.iloc[start:end]

        for index, row in dataframe.iterrows():
            title = row["title"]
            text = row["original_text"]
            persons = list(filter(lambda person: len(person) > 2, row["persons"]))

            if len(persons) > 0:
                personPattern = re.compile("|".join(persons), re.IGNORECASE)
                title = personPattern.sub("<Person>", title)
                text = personPattern.sub("<Person>", text)

            title = self.partyPattern.sub("<Partei>", title)
            text = self.partyPattern.sub("<Partei>", text)

            mediaPattern = re.compile("|".join(["bild", "bildplus", "taz", "tagesschau"]), re.IGNORECASE)
            title = mediaPattern.sub("<Zeitung>", title)
            text = mediaPattern.sub("<Zeitung>", text)

            print("================================================")
            print(title)
            print("++++++++++++++++++++++++++++++++++++++++++++++++")
            print(text)
            print("================================================")

            self.get_polarity_input(dataframe, index)
            self.get_subjectivity_input(dataframe, index)

            Writer.write_dataframe(dataframe, "labeled_paragraphs")
Пример #5
0
    def __init__(self):
        super(Simulator, self).__init__()

        self.model = cfg.model(cfg)
        self.optim = cfg.optim(cfg.learning_rate)
        self.loss = cfg.loss
        self.epoch = tf.Variable(0)

        self.writer = Writer(cfg)
        # Restore if save exists
        if Path('./simulator_saves/best').is_dir():
            self.model, self.optim, self.epoch = self.writer.restore(
                model=self.model, optim=self.optim, epoch=self.epoch)

        self.preprocessing()
Пример #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c",
                        "--config",
                        type=str,
                        required=True,
                        help="yaml file for config.")
    parser.add_argument(
        "-n",
        "--name",
        type=str,
        default=None,
        help="Name of the model. Used for both logging and saving chkpt.",
    )
    args = parser.parse_args()
    hp = load_hparam(args.config)

    if args.name is not None:
        hp.log.name = args.name

    # random seed
    if hp.train.random_seed is None:
        hp.train.random_seed = random.randint(1, 10000)
    set_random_seed(hp.train.random_seed)

    # set log/checkpoint dir
    hp.log.chkpt_dir = os.path.join(hp.log.chkpt_dir, hp.log.name)
    hp.log.log_dir = os.path.join(hp.log.log_dir, hp.log.name)
    os.makedirs(hp.log.chkpt_dir, exist_ok=True)
    os.makedirs(hp.log.log_dir, exist_ok=True)

    # set logger
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[
            logging.FileHandler(
                os.path.join(hp.log.log_dir,
                             "%s-%d.log" % (hp.log.name, time.time()))),
            logging.StreamHandler(),
        ],
    )
    logger = logging.getLogger()

    # set writer (tensorboard / wandb)
    writer = Writer(hp, hp.log.log_dir)

    hp_str = yaml.dump(hp.to_dict())
    logger.info("Config:")
    logger.info(hp_str)

    if hp.data.train_dir == "" or hp.data.test_dir == "":
        logger.error("train or test data directory cannot be empty.")
        raise Exception("Please specify directories of data in %s" %
                        args.config)

    train_loop(hp, logger, writer)
Пример #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--config',
                        type=str,
                        required=True,
                        help="yaml file for config.")
    parser.add_argument('-p',
                        '--checkpoint_path',
                        type=str,
                        default=None,
                        help="path of checkpoint pt file for resuming")
    parser.add_argument(
        '-n',
        '--name',
        type=str,
        required=True,
        help="Name of the model. Used for both logging and saving chkpt.")
    args = parser.parse_args()

    hp = HParam(args.config)
    hp_str = yaml.dump(hp)
    args_str = yaml.dump(vars(args))

    pt_dir = os.path.join(hp.log.chkpt_dir, args.name)
    log_dir = os.path.join(hp.log.log_dir, args.name)
    os.makedirs(pt_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        handlers=[
                            logging.FileHandler(
                                os.path.join(
                                    log_dir,
                                    '%s-%d.log' % (args.name, time.time()))),
                            logging.StreamHandler()
                        ])
    logger = logging.getLogger()

    logger.info('Config by yaml file')
    logger.info(hp_str)
    logger.info('Command Line Config')
    logger.info(args_str)

    if hp.data.train == '' or hp.data.test == '':
        logger.error("train or test data directory cannot be empty.")
        raise Exception("Please specify directories of data in %s" %
                        args.config)

    writer = Writer(hp, log_dir)
    train_loader = create_dataloader(hp, args, DataloaderMode.train)
    test_loader = create_dataloader(hp, args, DataloaderMode.test)

    train(args, pt_dir, train_loader, test_loader, writer, logger, hp, hp_str)
Пример #8
0
def main(DEVICE):

    # define model, optimizer, scheduler
    model = VQVC().to(DEVICE)

    recon_loss = nn.L1Loss().to(DEVICE)
    vocoder = get_vocgan(
        ckpt_path=args.vocoder_pretrained_model_path).to(DEVICE)

    mel_stat = torch.tensor(np.load(args.mel_stat_path)).to(DEVICE)

    optimizer = Adam(model.parameters(), lr=args.init_lr)
    scheduler = WarmupScheduler(optimizer,
                                warmup_epochs=args.warmup_steps,
                                initial_lr=args.init_lr,
                                max_lr=args.max_lr,
                                milestones=args.milestones,
                                gamma=args.gamma)

    global_step = load_checkpoint(checkpoint_path=args.model_checkpoint_path,
                                  model=model,
                                  optimizer=optimizer,
                                  scheduler=scheduler)

    # load dataset & dataloader
    train_dataset = SpeechDataset(mem_mode=args.mem_mode,
                                  meta_dir=args.prepro_meta_train,
                                  dataset_name=args.dataset_name,
                                  mel_stat_path=args.mel_stat_path,
                                  max_frame_length=args.max_frame_length)
    eval_dataset = SpeechDataset(mem_mode=args.mem_mode,
                                 meta_dir=args.prepro_meta_eval,
                                 dataset_name=args.dataset_name,
                                 mel_stat_path=args.mel_stat_path,
                                 max_frame_length=args.max_frame_length)

    train_data_loader = DataLoader(dataset=train_dataset,
                                   batch_size=args.train_batch_size,
                                   shuffle=True,
                                   drop_last=True,
                                   pin_memory=True,
                                   num_workers=args.n_workers)
    eval_data_loader = DataLoader(dataset=eval_dataset,
                                  batch_size=args.train_batch_size,
                                  shuffle=False,
                                  pin_memory=True,
                                  drop_last=True)

    # tensorboard
    writer = Writer(args.model_log_path) if args.log_tensorboard else None

    # train the model!
    train(train_data_loader, eval_data_loader, model, recon_loss, vocoder,
          mel_stat, optimizer, scheduler, global_step, writer, DEVICE)
Пример #9
0
    def encode(self):
        Writer.__init__(self)
        self.writeVint(0)
        self.writeVint(self.dataDB["LowID"])

        self.writeVint(0)
        self.writeVint(0)  # Count brawlers

        self.writeVint(14)  # Count array
        for x in range(14):
            self.writeVint(x + 1)
            self.writeVint(x)

        self.writeString(self.dataDB["name"])  # Name
        self.writeVint(100)
        self.writeVint(28000000)
        self.writeVint(43000000)

        self.writeBool(False)  # Is club
        self.writeVint(0)
Пример #10
0
    def train_threshold(self) -> float:
        """
        Train the threshold with labeled data.

        :return The best threshold.
        """
        threshold: float = 0
        best_threshold: float = 0
        best_score: float = 0

        thresholds: List[float] = []
        f1_scores: List[Tuple[float, float, float, float]] = []

        self.tfidf_sentiment.get_context_polarity(8)
        self.tfidf_sentiment.calculate_sentiment_score(overwrite=True)

        # Iterate over different thresholds, increase with every loop
        while threshold <= 0.005:
            self.tfidf_sentiment.map_sentiment(threshold=threshold, overwrite=True)

            # Optimize over the sum of all f1 scores for SentiWs
            f1_sentiws, _, _ = self.f1_score(training=True)
            f1_sum = f1_sentiws[0] + f1_sentiws[1] + f1_sentiws[2]

            thresholds.append(threshold)
            f1_scores.append((f1_sum, f1_sentiws[0], f1_sentiws[1], f1_sentiws[2]))

            # Replace best threshold if current one is better
            if f1_sum > best_score:
                best_score = f1_sum
                best_threshold = threshold

            threshold += 0.0000001

        # Visualize the training
        self.visualize_threshold(thresholds, f1_scores, best_threshold, 0.005)

        # Adjust the sentiment with best threshold
        self.tfidf_sentiment.map_sentiment(threshold=best_threshold, overwrite=True)
        Writer.write_dataframe(self.dataframe, "labeled_paragraphs")
        return best_threshold
Пример #11
0
 def __init__(self, opt, tensorboard=True):
     # opt is the global options shared in the coding env
     self.opt = opt
     self.tensorboard = tensorboard
     if tensorboard:
         import threading
         # self.t = threading.Thread(target=launchTensorBoard, args=([os.path.join(opt.checkpoints_dir, opt.name), opt.port]))
         # self.t.daemon = True
         launchTensorBoard(os.path.join(opt.checkpoints_dir, opt.name),
                           opt.port)
         self.t.start()
         # self.t.join(20)
         self.writer = SummaryWriter(
             os.path.join(opt.checkpoints_dir, opt.name))
     else:
         self.writer = Writer(os.path.join(opt.checkpoints_dir, opt.name))
     log_filename = os.path.join(opt.checkpoints_dir,
                                 '%s_log.txt' % opt.name)
     self.log = open(log_filename, 'w')
     self.write_opt(opt)
     self.save_image_root = os.path.join(opt.checkpoints_dir, opt.name)
Пример #12
0
    def __init__(self, dataset, net, device, args):
        self.dataset = dataset
        self.net = net
        self.device = torch.device(
            device if torch.cuda.is_available() else 'cpu')
        self.args = args

        self.forward = self.net.to(self.device)
        self.loss_fn = self._setup_loss_fn()

        # logs save in text and visualize in tensorboard
        self.recoder = Writer(self.args.log_dir)
Пример #13
0
 def _execute_transform(self):
     execution_info = ETLExecutionInfo("TRANSFORM")
     loaded_jobs = JobsLoader.Instance().loaded_jobs
     job_manager = JobsManager(loaded_jobs)
     results = job_manager.run()
     results = PosProcessor.Instance().run(results)
     Writer.Instance().run(results)
     execution_info.end()
     Log.Instance().appendFinalReport(
         "[TRANSFORM executed in: " +
         str(execution_info.execution_data['value']) + " minutes ]")
     return execution_info.execution_data
Пример #14
0
    def __init__(self):
        super(AlphaDoom, self).__init__()

        self.mcts = MCTS(cfg)
        self.replay = Replay(cfg)
        self.autoencoder = Simulator()
        self.autoencoder.train()
        #self.autoencoder = AutoEncoder()
        #tf.train.Checkpoint(model=self.autoencoder).restore(tf.train.latest_checkpoint('./simulator_saves/best'))

        # Load selected model
        self.model = cfg.model(cfg)
        self.loss1 = cfg.loss1
        self.loss2 = cfg.loss2
        self.optim = cfg.optim(cfg.learning_rate)
        self.epoch = tf.Variable(0)

        self.writer = Writer(cfg)
        # Restore if save exists
        if Path('./alphadoom_saves/best').is_dir():
            self.model, self.optim, self.epoch = self.writer.restore(
                self.model, self.optim, self.epoch)

        self.vizdoom = VizDoom(cfg)
Пример #15
0
def run_test(epoch=-1, name="evaluator_pretrained"):
    print('Running Test')
    opt = TestOptions().parse()
    opt.serial_batches = True  # no shuffle
    opt.name = name  # checkpoint dir name, evaluator/gan/vae_pretrained
    dataset = DataLoader(opt)
    model = create_model(opt)
    writer = Writer(opt)
    # test
    writer.reset_counter()

    for i, data in enumerate(dataset):
        model.set_input(data)
        ncorrect, nexamples = model.test()
        writer.update_counter(ncorrect, nexamples)
    writer.print_acc(epoch, writer.acc)
    return writer.acc
Пример #16
0
class ClientHelloMessage():
    def __init__(self):
        self.w = Writer()

    def add_header(self, data, id: int):
        header = b''
        header += self.w.writeShort(id)
        header += len(data).to_bytes(3, 'big')
        header += self.w.writeShort(0)
        header += data
        return header

    def send_client_hello(self, major: int, minor: int):
        message = b''
        message += self.w.writeInt(2)
        message += self.w.writeInt(11)
        message += self.w.writeInt(major)
        message += self.w.writeInt(0)
        message += self.w.writeInt(minor)
        message += self.w.writeInt(0)
        message += self.w.writeInt(2)
        message += self.w.writeInt(2)
        return self.add_header(message, 10100)
Пример #17
0
def train(args):

    os.makedirs(args.checkpoint_dir, exist_ok=True)
    logging = GetLogging(args.logfile)

    train_dataset = CustomerDataset(args.input,
                                    upsample_factor=hop_length,
                                    local_condition=True,
                                    global_condition=False)

    device = torch.device("cuda" if args.use_cuda else "cpu")
    generator, discriminator = create_model(args)

    print(generator)
    print(discriminator)

    num_gpu = torch.cuda.device_count() if args.use_cuda else 1

    global_step = 0

    g_parameters = list(generator.parameters())
    g_optimizer = optim.Adam(g_parameters, lr=args.g_learning_rate)

    d_parameters = list(discriminator.parameters())
    d_optimizer = optim.Adam(d_parameters, lr=args.d_learning_rate)

    writer = Writer(args.checkpoint_dir, sample_rate=sample_rate)

    generator.to(device)
    discriminator.to(device)

    if args.resume is not None:
        restore_step = attempt_to_restore(generator, discriminator,
                                          g_optimizer, d_optimizer,
                                          args.resume, args.use_cuda, logging)
        global_step = restore_step

    customer_g_optimizer = Optimizer(g_optimizer, args.g_learning_rate,
                                     global_step, args.warmup_steps,
                                     args.decay_learning_rate)
    customer_d_optimizer = Optimizer(d_optimizer, args.d_learning_rate,
                                     global_step, args.warmup_steps,
                                     args.decay_learning_rate)

    criterion = nn.MSELoss().to(device)
    stft_criterion = MultiResolutionSTFTLoss()

    for epoch in range(args.epochs):

        collate = CustomerCollate(upsample_factor=hop_length,
                                  condition_window=args.condition_window,
                                  local_condition=True,
                                  global_condition=False)

        train_data_loader = DataLoader(train_dataset,
                                       collate_fn=collate,
                                       batch_size=args.batch_size,
                                       num_workers=args.num_workers,
                                       shuffle=True,
                                       pin_memory=True)

        #train one epoch
        for batch, (samples, conditions) in enumerate(train_data_loader):

            start = time.time()
            batch_size = int(conditions.shape[0] // num_gpu * num_gpu)

            samples = samples[:batch_size, :].to(device)
            conditions = conditions[:batch_size, :, :].to(device)

            losses = {}

            if num_gpu > 1:
                g_outputs = parallel(generator, (conditions, ))
            else:
                g_outputs = generator(conditions)

            sc_loss, mag_loss = stft_criterion(g_outputs.squeeze(1),
                                               samples.squeeze(1))

            g_loss = sc_loss + mag_loss

            losses['sc_loss'] = sc_loss.item()
            losses['mag_loss'] = mag_loss.item()
            losses['g_loss'] = g_loss.item()

            customer_g_optimizer.zero_grad()
            g_loss.backward()
            nn.utils.clip_grad_norm_(g_parameters, max_norm=0.5)
            customer_g_optimizer.step_and_update_lr()

            time_used = time.time() - start

            logging.info(
                "Step: {} --sc_loss: {:.3f} --mag_loss: {:.3f} --Time: {:.2f} seconds"
                .format(global_step, sc_loss, mag_loss, time_used))

            if global_step % args.checkpoint_step == 0:
                save_checkpoint(args, generator, discriminator, g_optimizer,
                                d_optimizer, global_step, logging)

            if global_step % args.summary_step == 0:
                writer.logging_loss(losses, global_step)
                target = samples.cpu().detach()[0, 0].numpy()
                predict = g_outputs.cpu().detach()[0, 0].numpy()
                writer.logging_audio(target, predict, global_step)
                writer.logging_histogram(generator, global_step)
                writer.logging_histogram(discriminator, global_step)

            global_step += 1
Пример #18
0
def train_loop(rank, hp, world_size=1):
    # reload hp
    hp = DotDict(hp)
    if hp.model.device.lower() == "cuda" and world_size != 0:
        setup(hp, rank, world_size)
    if rank != 0:
        logger = None
        writer = None
    else:
        # set logger
        logger = make_logger(hp)
        # set writer (tensorboard / wandb)
        writer = Writer(hp, hp.log.log_dir)
        hp_str = yaml.dump(hp.to_dict())
        logger.info("Config:")
        logger.info(hp_str)
        if hp.data.train_dir == "" or hp.data.test_dir == "":
            logger.error("train or test data directory cannot be empty.")
            raise Exception("Please specify directories of data")
        logger.info("Set up train process")

    if hp.model.device.lower() == "cuda" and world_size != 0:
        hp.model.device = rank
        torch.cuda.set_device(rank)
    else:
        hp.model.device = hp.model.device.lower()

    # make dataloader
    if logger is not None:
        logger.info("Making train dataloader...")
    train_loader = create_dataloader(hp, DataloaderMode.train, rank,
                                     world_size)
    if logger is not None:
        logger.info("Making test dataloader...")
    test_loader = create_dataloader(hp, DataloaderMode.test, rank, world_size)

    # init Model
    net_arch = Net_arch(hp)
    loss_f = torch.nn.MSELoss()
    model = Model(hp, net_arch, loss_f, rank, world_size)

    # load training state
    if hp.load.resume_state_path is not None:
        model.load_training_state(logger)
    else:
        if logger is not None:
            logger.info("Starting new training run.")

    try:
        epoch_step = 1 if hp.data.divide_dataset_per_gpu else world_size
        for model.epoch in itertools.count(model.epoch + 1, epoch_step):
            if model.epoch > hp.train.num_iter:
                break
            train_model(hp, model, train_loader, writer, logger)
            if model.epoch % hp.log.chkpt_interval == 0:
                model.save_network(logger)
                model.save_training_state(logger)
            test_model(hp, model, test_loader, writer)
        cleanup()
        if logger is not None:
            logger.info("End of Train")
    except Exception as e:
        if logger is not None:
            logger.info("Exiting due to exception: %s" % e)
        traceback.print_exc()
        cleanup()
Пример #19
0
 def encode(self):
     Writer.__init__(self)
    best_val_loss = 1e100
    best_do = -1
    l2 = 0.0001
    do_layers = 1
    bn = True

    for lr in [0.0005, 0.005]:
        for l2 in [0.00001, 0.0001, 0.0005]:
            for do_layers in [1, 2]:
                for bn in [True, False]:
                    args.checkpoints_dir = out_dir_ii = out_dir_i + '/lr_{}_l2_{}_do_{}_bn_{}/checkpoints'.format(
                        lr, l2, do_layers, bn)
                    utils.makedirs(args.checkpoints_dir)
                    out_dir_ii = out_dir_i + '/lr_{}_l2_{}_do_{}_bn_{}'.format(
                        lr, l2, do_layers, bn)
                    writer = Writer(args, outdir=out_dir_ii)

                    # Defining model
                    model = MLPModelWPathways(in_channels,
                                              n_classes,
                                              args.num_layers,
                                              args.hidden_gcn,
                                              args.hidden_fc,
                                              pathway_edge_index.to(device),
                                              n_cmt,
                                              mode=args.feature_agg_mode,
                                              batchnorm=bn,
                                              do_layers=do_layers).to(device)

                    args.model_parameters = utils.count_parameters(model)
                    #     writer.save_args()
Пример #21
0
    hit10 = np.sum(ranks <= 10, axis=0) / len(ranks)
    mrr_sum = (1. / ranks).sum(axis=0)
    mrr = np.tile(
        np.array([mrr_sum[0] + mrr_sum[2], mrr_sum[1] + mrr_sum[3]]) /
        (2 * len(ranks)), 2)
    result = pd.DataFrame({
        "mrr": mrr,
        "mean rank": mean_rank,
        "hit10": hit10
    },
                          index=[
                              "tail: raw ranking", "tail: filtered ranking",
                              "head: raw ranking", "head: filtered ranking"
                          ])
    result["hit10"] = result["hit10"].apply(lambda x: "%.2f%%" % (x * 100))
    ranks = pd.DataFrame(
        ranks,
        columns=["tail:raw", "tail:filtered", "head:raw", "head:filtered"])
    return ranks, result


dis.eval()
ranks, result = evaluate()

writer = Writer(configs)
logger = Logger(configs)
writer.write(result)

if configs.log:
    logger.write(ranks)
Пример #22
0
            )
            sys.exit()

        comparison = Comparison(labeled_file)

        # Train the score threshold
        optimal_threshold = comparison.train_threshold()
        print("Optimal threshold: {}\n".format(optimal_threshold))

        # Train the window and the score threshold
        optimal_context_thresholds = comparison.train_context_thresholds()
        print("Optimal context thresholds: {} (window), {} (score)\n".format(
            optimal_context_thresholds[0], optimal_context_thresholds[1]))

    # Save paragraphs to disk
    Writer.write_dataframe(df_paragraphs, "paragraphs")

    # Show GUI
    if args.show_gui:
        gui = SentimentGUI(df_paragraphs)
        gui.show_gui()

    # Compare labeled data with results
    if args.compare:
        labeled_file = Path("src/output/labeled_paragraphs.json")

        if not labeled_file.exists():
            print(
                'You have to provide a labeled file "labeled_paragraphs.json" for comparison in the output folder'
            )
            sys.exit()
Пример #23
0
db_name = args.db_name
ddl_path = args.ddl_path
db_path = args.db_path
url_mssql = args.mssql_url
pg_url = args.pg_url

downloader = MSSQLDownloader(mssql_queries, url_mssql)
print("downloading db from " + url_mssql)
schema = downloader.load('dbo')
converter = Converter()
converter.convertRam2Xml(schema, result_path + 'dbo.xml')

ddl_generator = DBInitialisator()
ddl = ddl_generator.generate_ddl(schema)
Writer.write(ddl_path + schema.name + ".ddl", '\n'.join(ddl))
print("ddl saved to {}".format(ddl_path + schema.name + ".ddl"))

pg_init = DBInitialisator()

conn = postgresql.open(pg_url)
conn.execute(pg_init.drop_database(db_name))
print("creating database {}".format(db_name))
conn.execute(pg_init.create_database_ddl(db_name))
conn.close()
print("connecting to database {}".format(db_name))
conn = postgresql.open(pg_url + '/' + db_name.lower())

conn.execute('\n '.join(ddl))

data_transfer = DataTransfering(db_name, mssql_url,
Пример #24
0
class Simulator(object):
    def __init__(self):
        super(Simulator, self).__init__()

        self.model = cfg.model(cfg)
        self.optim = cfg.optim(cfg.learning_rate)
        self.loss = cfg.loss
        self.epoch = tf.Variable(0)

        self.writer = Writer(cfg)
        # Restore if save exists
        if Path('./simulator_saves/best').is_dir():
            self.model, self.optim, self.epoch = self.writer.restore(
                model=self.model, optim=self.optim, epoch=self.epoch)

        self.preprocessing()

    def preprocessing(self):
        if cfg.package_data or not Path('./data.pkl').is_file():
            vizdoom = VizDoom(cfg)
            memory = []
            for episode in trange(cfg.gather_epochs):
                vizdoom.new_episode()
                s0 = vizdoom.get_preprocessed_state()

                while not vizdoom.is_episode_finished():
                    action = random.choice(cfg.actions)
                    vizdoom.make_action(action)

                    s1 = vizdoom.get_preprocessed_state()
                    action = np.reshape(
                        action, [1, 1, len(cfg.actions)]).astype(np.float32)

                    memory.append([s0, action, s1])
                    s0 = s1

            with open('data.pkl', 'wb') as f:
                pickle.dump(memory, f)

        # Load data
        with open(cfg.data_dir, 'rb') as f:
            s0, action, s1 = zip(*pickle.load(f))

        self.size = len(s0)
        self.data = tf.data.Dataset.from_tensor_slices(
            (np.array(s0), np.array(action), np.array(s1)))

    def update(self, s0, action, s1):
        # Normalize
        s0_n = tf.image.per_image_standardization(s0)
        truth = tf.image.per_image_standardization(s1) - s0_n
        # Construct graph
        with tf.GradientTape() as tape:
            # Approximate next frame
            logits = self.model(s0_n, action)
            # Compare generated transformation matrix with truth
            loss = tf.reduce_mean(self.loss(truth, logits))

        # Log stats, images
        self.writer.log(self.optim, tape, loss)
        self.writer.log_state("logits", logits)
        self.writer.log_state("truth_logits", truth)
        # Compute/apply gradients
        grads = tape.gradient(loss, self.model.trainable_weights)
        grads_and_vars = zip(grads, self.model.trainable_weights)
        self.optim.apply_gradients(grads_and_vars)

        self.writer.global_step.assign_add(1)

    def train(self):
        for epoch in trange(self.epoch.numpy(), cfg.epochs):
            # Uniform shuffle
            batch = self.data.shuffle(self.size).batch(cfg.batch_size)
            for s0, action, s1 in batch:
                self.update(s0, action, s1)
            self.epoch.assign_add(1)
        self.writer.save(self.model, self.optim, self.epoch)

    def predict(self, s0, action):
        s0_n = tf.image.per_image_standardization(s0)
        logits = self.model(s0_n, action[None])
        return logits + s0_n
Пример #25
0
DATA_DIR = 'data/books_processed/'
LR = 0.0025

model_param = dict(max_n=MAX_SEQ_LEN,
                   nb_tokens=50256,
                   d_model=768,
                   d_ff=3072,
                   activation=gelu,
                   h=12,
                   dropout_value=0.1,
                   weight_init=0,
                   bias_init=0.02,
                   mask=True,
                   n_block=1)

writer = Writer(experiment_name='GPT-1')

# load data

transformer = Transformer(**model_param).to(device)
transformer.apply(GPT1_weight_init)

#transformer.load_state_dict(torch.load('model_weights/GPT1_3_9000'))
print('Model loaded')

book_dataset = BookCorpusDataset(DATA_DIR, nb_tokens=MAX_SEQ_LEN + 1)
print('Dataset loaded')
dataset_loader = torch.utils.data.DataLoader(book_dataset,
                                             batch_size=BATCH,
                                             shuffle=SHUFFLE,
                                             num_workers=NUM_WORKERS)
Пример #26
0
elif db:
    print("reading database {} ".format(db))
    db_downloader = DBDownloader(sqlite_queries, db, None)
    schemas_from_db = db_downloader.load()
    schemas = schemas_from_db
    arr = db.replace('.db', '').split('\\')
    db_name = arr[arr.__len__() - 1]

pg_init = DBInitialisator()

print("establishing connection...")
url = postgressql_url
conn = postgresql.open(url)
conn.execute(pg_init.drop_database(db_name))
print("creating database {}".format(db_name))
conn.execute(pg_init.create_database_ddl(db_name))
conn.close()
print("connecting to database {}".format(db_name))
conn = postgresql.open(url + '/' + db_name.lower())

for schema in schemas.values():
    print("generating ddl for schema {}".format(schema.name))
    ddl = pg_init.generate_ddl(schema)  # generate ddl instructions
    Writer.write(ddl_path + db_name + ".ddl", '\n '.join(ddl))
    print("ddl saved to {}".format(ddl_path + db_name + ".ddl"))
    conn.execute('\n '.join(ddl))
    print("schema '{}' was created".format(schema.name))

conn.close()
Пример #27
0
def train_loop(rank, cfg):
    logger = get_logger(cfg, os.path.basename(__file__))
    if cfg.device == "cuda" and cfg.dist.gpus != 0:
        cfg.device = rank
        # turn off background generator when distributed run is on
        cfg.data.use_background_generator = False
        setup(cfg, rank)
        torch.cuda.set_device(cfg.device)

    # setup writer
    if is_logging_process():
        # set log/checkpoint dir
        os.makedirs(cfg.log.chkpt_dir, exist_ok=True)
        # set writer (tensorboard / wandb)
        writer = Writer(cfg, "tensorboard")
        cfg_str = OmegaConf.to_yaml(cfg)
        logger.info("Config:\n" + cfg_str)
        if cfg.data.train_dir == "" or cfg.data.test_dir == "":
            logger.error("train or test data directory cannot be empty.")
            raise Exception("Please specify directories of data")
        logger.info("Set up train process")
        logger.info("BackgroundGenerator is turned off when Distributed running is on")

        # download MNIST dataset before making dataloader
        # TODO: This is example code. You should change this part as you need
        _ = torchvision.datasets.MNIST(
            root=hydra.utils.to_absolute_path("dataset/meta"),
            train=True,
            transform=torchvision.transforms.ToTensor(),
            download=True,
        )
        _ = torchvision.datasets.MNIST(
            root=hydra.utils.to_absolute_path("dataset/meta"),
            train=False,
            transform=torchvision.transforms.ToTensor(),
            download=True,
        )
    # Sync dist processes (because of download MNIST Dataset)
    if cfg.dist.gpus != 0:
        dist.barrier()

    # make dataloader
    if is_logging_process():
        logger.info("Making train dataloader...")
    train_loader = create_dataloader(cfg, DataloaderMode.train, rank)
    if is_logging_process():
        logger.info("Making test dataloader...")
    test_loader = create_dataloader(cfg, DataloaderMode.test, rank)

    # init Model
    net_arch = Net_arch(cfg)
    loss_f = torch.nn.CrossEntropyLoss()
    model = Model(cfg, net_arch, loss_f, rank)

    # load training state / network checkpoint
    if cfg.load.resume_state_path is not None:
        model.load_training_state()
    elif cfg.load.network_chkpt_path is not None:
        model.load_network()
    else:
        if is_logging_process():
            logger.info("Starting new training run.")

    try:
        if cfg.dist.gpus == 0 or cfg.data.divide_dataset_per_gpu:
            epoch_step = 1
        else:
            epoch_step = cfg.dist.gpus
        for model.epoch in itertools.count(model.epoch + 1, epoch_step):
            if model.epoch > cfg.num_epoch:
                break
            train_model(cfg, model, train_loader, writer)
            if model.epoch % cfg.log.chkpt_interval == 0:
                model.save_network()
                model.save_training_state()
            test_model(cfg, model, test_loader, writer)
        if is_logging_process():
            logger.info("End of Train")
    except Exception as e:
        if is_logging_process():
            logger.error(traceback.format_exc())
        else:
            traceback.print_exc()
    finally:
        if cfg.dist.gpus != 0:
            cleanup()
def main():
    opt = TrainOptions().parse()
    if opt == None:
        return

    dataset = DataLoader(opt)
    dataset_size = len(dataset) * opt.num_grasps_per_object
    model = create_model(opt)
    writer = Writer(opt)
    total_steps = 0
    for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1):
        epoch_start_time = time.time()
        iter_data_time = time.time()
        epoch_iter = 0
        for i, data in enumerate(dataset):
            iter_start_time = time.time()
            if total_steps % opt.print_freq == 0:
                t_data = iter_start_time - iter_data_time
            total_steps += opt.batch_size
            epoch_iter += opt.batch_size
            model.set_input(data)
            model.optimize_parameters()
            if total_steps % opt.print_freq == 0:
                loss_types = []
                if opt.arch == "vae":
                    loss = [
                        model.loss, model.kl_loss, model.reconstruction_loss,
                        model.confidence_loss, model.l2_loss
                    ]
                    loss_types = [
                        "total_loss", "kl_loss", "reconstruction_loss",
                        "confidence loss", "l2_loss"
                    ]
                elif opt.arch == "gan":
                    loss = [
                        model.loss, model.reconstruction_loss,
                        model.confidence_loss
                    ]
                    loss_types = [
                        "total_loss", "reconstruction_loss", "confidence_loss"
                    ]
                else:
                    loss = [
                        model.loss, model.classification_loss,
                        model.confidence_loss
                    ]
                    loss_types = [
                        "total_loss", "classification_loss", "confidence_loss"
                    ]
                t = (time.time() - iter_start_time) / opt.batch_size
                writer.print_current_losses(epoch, epoch_iter, loss, t, t_data,
                                            loss_types)
                writer.plot_loss(loss, epoch, epoch_iter, dataset_size,
                                 loss_types)

            if i % opt.save_latest_freq == 0:
                print('saving the latest model (epoch %d, total_steps %d)' %
                      (epoch, total_steps))
                model.save_network('latest', epoch)

            iter_data_time = time.time()

        if epoch % opt.save_epoch_freq == 0:
            print('saving the model at the end of epoch %d, iters %d' %
                  (epoch, total_steps))
            model.save_network('latest', epoch)
            model.save_network(str(epoch), epoch)

        print('End of epoch %d / %d \t Time Taken: %d sec' %
              (epoch, opt.niter + opt.niter_decay,
               time.time() - epoch_start_time))
        model.update_learning_rate()
        if opt.verbose_plot:
            writer.plot_model_wts(model, epoch)

        if epoch % opt.run_test_freq == 0:
            acc = run_test(epoch, name=opt.name)
            writer.plot_acc(acc, epoch)

    writer.close()
Пример #29
0
class Visualizer:
    def __init__(self, opt, tensorboard=True):
        # opt is the global options shared in the coding env
        self.opt = opt
        self.tensorboard = tensorboard
        if tensorboard:
            import threading
            # self.t = threading.Thread(target=launchTensorBoard, args=([os.path.join(opt.checkpoints_dir, opt.name), opt.port]))
            # self.t.daemon = True
            launchTensorBoard(os.path.join(opt.checkpoints_dir, opt.name),
                              opt.port)
            self.t.start()
            # self.t.join(20)
            self.writer = SummaryWriter(
                os.path.join(opt.checkpoints_dir, opt.name))
        else:
            self.writer = Writer(os.path.join(opt.checkpoints_dir, opt.name))
        log_filename = os.path.join(opt.checkpoints_dir,
                                    '%s_log.txt' % opt.name)
        self.log = open(log_filename, 'w')
        self.write_opt(opt)
        self.save_image_root = os.path.join(opt.checkpoints_dir, opt.name)

    def add_text(self, tag, text):
        self.writer.add_text(tag, text)

    def write_opt(self, opt):
        args = vars(self.opt)

        self.write_log_rightnow('------------ Options -------------\n')
        for k, v in sorted(args.items()):
            self.write_log_rightnow('%s: %s\n' % (str(k), str(v)))
        self.write_log_rightnow('-------------- End ----------------\n')

    def write_acc(self, acc):
        self.write_log_rightnow('Acc: %f\n' % acc)

    def write_log_rightnow(self, str):
        self.log.write(str)
        self.log.flush()
        os.fsync(self.log)

    def write_confmat(self, conf_mat):
        self.write_log_rightnow('Conf Mat:\n')
        for line in conf_mat:
            for e in line:
                self.write_log_rightnow('%f ' % e)
            self.write_log_rightnow('\n')

    def add_log(self, str):
        # print str
        self.write_log_rightnow('%s\n' % str)

    def write_epoch(self, epoch):
        self.write_log_rightnow('Epoch %d\n' % epoch)

    def plot_errors(self, errors, main_fig_title='errors', time_step=None):
        # example:  main_fig_title = 'GAN_depth_to_image', sub_fig_title = 'train_loss'
        # scalars_dict = {'item': value, 'item2':value},
        # when time_step is None, it uses internal global_time_step
        # for k, v in errors.items():
        tag = main_fig_title + '/'
        self.writer.add_scalars(tag, errors, time_step)

    def kill(self):
        # print self.t.getName()
        self.log.close()

    def display_image(self, images, time_step=None):
        # images : {'real': real_im,'fake':fake_im}
        for item in images.keys():
            self.writer.add_image(item, images[item], time_step)

    def save_images(self, images, epoch):
        for item in images.keys():
            cur_image_fullpath = os.path.join(
                self.save_image_root, 'Epoch_%06d_%s.png' % (epoch, item))
            simple_util.save_image(images[item], cur_image_fullpath)

    def print_errors(self, epoch, batch, batch_all, errors):
        message = '(epoch: %d, (%d/%d)) ' % (epoch, batch, batch_all)
        for k, v in errors.items():
            message += '%s: %.3f ' % (k, v)
        print(message)
Пример #30
0
def train_loop(rank, hp, world_size=0):
    if hp.model.device == "cuda" and world_size != 0:
        hp.model.device = rank
        # turn off background generator when distributed run is on
        hp.data.use_background_generator = False
        setup(hp, rank, world_size)
        torch.cuda.set_device(hp.model.device)

    # setup logger / writer
    if rank != 0:
        logger = None
        writer = None
    else:
        # set logger
        logger = make_logger(hp)
        # set writer (tensorboard / wandb)
        writer = Writer(hp, os.path.join(hp.log.log_dir, "tensorboard"))
        hp_str = yaml.dump(hp.to_dict())
        logger.info("Config:")
        logger.info(hp_str)
        if hp.data.train_dir == "" or hp.data.test_dir == "":
            logger.error("train or test data directory cannot be empty.")
            raise Exception("Please specify directories of data")
        logger.info("Set up train process")
        logger.info(
            "BackgroundGenerator is turned off when Distributed running is on")

        # download MNIST dataset before making dataloader
        # TODO: This is example code. You should change this part as you need
        _ = torchvision.datasets.MNIST(
            root="dataset/meta",
            train=True,
            transform=torchvision.transforms.ToTensor(),
            download=True,
        )
        _ = torchvision.datasets.MNIST(
            root="dataset/meta",
            train=False,
            transform=torchvision.transforms.ToTensor(),
            download=True,
        )
    # Sync dist processes (because of download MNIST Dataset)
    if world_size != 0:
        dist.barrier()

    # make dataloader
    if logger is not None:
        logger.info("Making train dataloader...")
    train_loader = create_dataloader(hp, DataloaderMode.train, rank,
                                     world_size)
    if logger is not None:
        logger.info("Making test dataloader...")
    test_loader = create_dataloader(hp, DataloaderMode.test, rank, world_size)

    # init Model
    net_arch = Net_arch(hp)
    loss_f = torch.nn.CrossEntropyLoss()
    model = Model(hp, net_arch, loss_f, rank, world_size)

    # load training state / network checkpoint
    if hp.load.resume_state_path is not None:
        model.load_training_state(logger)
    elif hp.load.network_chkpt_path is not None:
        model.load_network(logger=logger)
    else:
        if logger is not None:
            logger.info("Starting new training run.")

    try:
        if world_size == 0 or hp.data.divide_dataset_per_gpu:
            epoch_step = 1
        else:
            epoch_step = world_size
        for model.epoch in itertools.count(model.epoch + 1, epoch_step):
            if model.epoch > hp.train.num_epoch:
                break
            train_model(hp, model, train_loader, writer, logger)
            if model.epoch % hp.log.chkpt_interval == 0:
                model.save_network(logger)
                model.save_training_state(logger)
            test_model(hp, model, test_loader, writer, logger)
        if logger is not None:
            logger.info("End of Train")
    except Exception as e:
        if logger is not None:
            logger.error(traceback.format_exc())
        else:
            traceback.print_exc()
    finally:
        if world_size != 0:
            cleanup()