def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    '''
    src_voc_size = 11
    tgt_voc_size = 11
    train_loader=torch.utils.data.DataLoader(
        MyDataSet(V=11, batch=20),
        num_workers=2,
        batch_size=30,
    )
    valid_loader = torch.utils.data.DataLoader(
        MyDataSet(V=11, batch=20),
        num_workers=2,
        batch_size=30,
    )
    '''
    traindata, valdata, testdata, indexdict, (src_maxlen, trg_maxlen), (src_voc_size, tgt_voc_size) = \
        DataLoader(32).get_data_iterator()
    model = Transformer(src_vocab_size=src_voc_size,
                        src_max_len=src_maxlen,
                        tgt_vocab_size=tgt_voc_size,
                        tgt_max_len=trg_maxlen)

    optimizer = MyOptimizer(model_size=512,
                            factor=1.0,
                            warmup=400,
                            optimizer=optim.Adam(filter(
                                lambda x: x.requires_grad, model.parameters()),
                                                 betas=(0.9, 0.98),
                                                 eps=1e-09))
    train(model, traindata, valdata, optimizer, indexdict, device, 25)
Пример #2
0
    def __init__(self):

        self.df, self.df_log = (DataLoader())()

        parser = argparse.ArgumentParser(
            description='Manager of the project',
            usage='''manage.py <command>'

The most commonly used commands are:
   dataset      Access to the dataset
   landscape    Access to the landscape persistence
   norm         Access to the norm of the persistence
   bottleneck   Access to the bottleneck of the persistence
''')
        parser.add_argument('command', help='Subcommand to run')

        if len(sys.argv) < 2:
            print('No command')
            parser.print_help()
            exit(1)
        # parse_args defaults to [1:] for args, but you need to
        # exclude the rest of the args too, or validation will fail
        args = parser.parse_args(sys.argv[1:2])
        if not hasattr(self, args.command):
            print('Unrecognized command')
            parser.print_help()
            exit(1)

        # use dispatch pattern to invoke method with same name
        getattr(self, args.command)()
Пример #3
0
def random():

    if os.path.exists("log") is False:
        os.mkdir("log")

    if os.path.exists("model") is False:
        os.mkdir("model")

    loader_test = DataLoader()

    loader_test.load_xls("dlt2.xls")

    rnn = ModelRNN("log",
                   "model",
                   lstm_size=128,
                   num_layers=2,
                   learning_rate=0.001)

    rnn.build_lstm_model_lstm(1,
                              loader_test.get_seq_len(),
                              1,
                              loader_test.get_classes_count(),
                              test_mode=True)

    rnn.predict(loader_test, 16, None)
Пример #4
0
def train():

    print("run...")

    if os.path.exists("log") is False:
        os.mkdir("log")

    if os.path.exists("model") is False:
        os.mkdir("model")

    loader = DataLoader()

    loader.load_xls("dlt2.xls")

    rnn = ModelRNN("log",
                   "model",
                   lstm_size=128,
                   num_layers=2,
                   learning_rate=0.001)

    rnn.build_lstm_model_lstm(32,
                              loader.get_seq_len(),
                              1,
                              loader.get_classes_count(),
                              test_mode=False)

    rnn.train(loader)
Пример #5
0
def main(argv):
    print("Torch GPU set:", argv.gpu_id)
    torch.manual_seed(argv.seed)
    print("Torch Seed was:", argv.seed)
    utils.mkdir(os.path.join(argv.res_root, argv.model_folder))

    loader = DataLoader(argv)
    loader.read_data()
    argv.desired_len = loader.desired_len
    # assert loader.max_idx == argv.max_idx

    is_nn = (argv.model != 'nb')
    model = create_model(argv, loader)

    # load chkpt
    checkpoint = None
    if argv.mode == 'test' or argv.use_checkpoint:

        chkpt_load_path = None
        for file in os.listdir(os.path.join(argv.res_root, argv.model_folder)):
            if f"{argv.checkpoint_name}_epoch{argv.checkpoint_ver}" in file:
                chkpt_load_path = os.path.join(argv.res_root,
                                               argv.model_folder, file)
                break
        if chkpt_load_path is None:
            raise Exception("Can't find checkpoint")

        print(f"\tLoading {chkpt_load_path}")
        checkpoint = torch.load(chkpt_load_path)
        # old argv content that still want to keep
        checkpoint['argv'].output = argv.output
        checkpoint['argv'].mode = argv.mode
        checkpoint['argv'].use_checkpoint = argv.use_checkpoint
        assert checkpoint['epoch'] == argv.checkpoint_ver
        checkpoint['argv'].checkpoint_ver = argv.checkpoint_ver

        argv = checkpoint['argv']
        epoch = checkpoint['epoch'] + 1
        model = create_model(argv, loader)
        loader.desired_len = argv.desired_len
        loader.batch_size = argv.batch_size

    if USE_CUDA and is_nn:
        torch.cuda.set_device(argv.gpu_id)
        model = model.cuda()

    print(f"\n{argv.mode} {type(model).__name__} {'#'*50}")
    if argv.mode == 'test':
        model.load_state_dict(checkpoint['model_state_dict'])
        test_model(model, loader, is_nn)
    else:
        train_model(model, loader, checkpoint, is_nn)

    print()
Пример #6
0
    def predict(self, loader: DataLoader, seq_count, ckp_path):

        if ckp_path is None:
            last_model_path = tf.train.latest_checkpoint(self.model_path)
        else:
            last_model_path = ckp_path

        saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.7
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:

            print("last Model : {}".format(last_model_path))
            saver.restore(sess, last_model_path)

            local_prediction = tf.argmax(self.prediction, 1)
            state_in = sess.run(self.state_in)
            sess.graph.finalize()

            for i in range(seq_count):
                x = np.array([[np.random.randint(1, self.symbols_len, 1)[0]]])
                x = np.reshape(x, x.shape + (1, ))
                result = []
                for j in range(5):

                    seq_len_ = np.array([x.shape[1]])

                    feed = {
                        self.inputs: x,
                        self.keep_prob: 1.0,
                        self.state_in: state_in,
                        self.seq_len_to_dynamic_rnn: seq_len_
                    }

                    preds, state_in = sess.run(
                        [local_prediction, self.state_out], feed_dict=feed)
                    x[0] = preds[0]
                    result.append(preds[0])
                front = list(set(result))
                back = list(set(loader.statistics_back()))
                if len(front) == 5 and len(back) == 2:
                    print("{}: front {} back {}".format(i, front, back))
from LoadData import DataLoader
from argparse import Namespace

data_path = "./src_zzc/data/Training.txt"
dest = "./src_zzc/data_ensemble"

argv = Namespace()
argv.data_path = data_path
argv.batch_size = 50
argv.fold = 5
argv.mode = 'train'
argv.desired_len_percent = 0.5
argv.num_label = 20
argv.seed = 1587064891

loader = DataLoader(argv)
loader.read_data()

train = open(os.path.join(dest, 'Training.txt'), 'w')
train_label = open(os.path.join(dest, 'Training_Label.txt'), 'w')
for gt, msg in loader.data:
    train_label.write(f"{gt}\n")
    train.write(",".join([str(word) for word in msg]))
    train.write("\n")

train.close()
train_label.close()

train = open(os.path.join(dest, 'Training_val.txt'), 'w')
train_label = open(os.path.join(dest, 'Training_val_Label.txt'), 'w')
for gt, msg in loader.data_val:
Пример #8
0
    def test(self, samples_loader: DataLoader = None, ckp_path: str = None):

        if ckp_path is None:
            last_model_path = tf.train.latest_checkpoint(self.model_path)
        else:
            last_model_path = ckp_path

        saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.7
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:

            print("last Model : {}".format(last_model_path))
            saver.restore(sess, last_model_path)

            self.train_size = samples_loader.get_test_count()
            self.BATCH_SIZE = 1
            next_sample_step = 1

            n_batchs_in_epoch = int(
                self.train_size / self.BATCH_SIZE) // next_sample_step

            state_in = sess.run(self.state_in)

            error_count = 0
            amount = 0
            loss_count = 0

            local_prediction = tf.argmax(self.prediction, 1)

            sess.graph.finalize()

            for i in range(n_batchs_in_epoch):

                x, y = samples_loader.next_batch_test()
                x = np.reshape(x, x.shape + (1, ))
                seq_len_ = np.array([x.shape[1]])

                feed = {
                    self.inputs: x,
                    self.targets: y,
                    self.keep_prob: 1.0,
                    self.state_in: state_in,
                    self.seq_len_to_dynamic_rnn: seq_len_
                }

                preds, loss, state_in = sess.run(
                    [local_prediction, self.loss, self.state_out],
                    feed_dict=feed)

                diff = preds - y

                error_count += np.count_nonzero(diff)
                loss_count += loss

                amount += diff.size
                error_rate = np.count_nonzero(diff) / diff.size
                print("{} / {} errorRate: {} ; loss {}".format(
                    i, n_batchs_in_epoch, error_rate, loss))

            print("result match: {} % , avg loss {}".format(
                (100 * (amount - error_count)) / amount,
                loss_count / n_batchs_in_epoch))
Пример #9
0
    def train(self, samples_loader: DataLoader = None):

        print("trainer")

        np.random.seed(int(time.time()))

        epochs = 1000

        save_every_n = 200

        TIMESTAMP = "{0:%Y-%m-%d-%H-%M-%S/}".format(datetime.now())

        train_log_dir = os.path.join(self.log_path, TIMESTAMP)

        saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.7
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:

            sess.run(tf.global_variables_initializer())

            writer = tf.summary.FileWriter(train_log_dir, sess.graph)

            state_in = sess.run(self.state_in)

            local_prediction = tf.argmax(self.prediction, 1)

            sess.graph.finalize()

            counter = 0
            for e in range(epochs):
                # Train network

                self.train_size = samples_loader.get_train_count()
                self.BATCH_SIZE = 2

                n_batchs_in_epoch = max(1,
                                        int(self.train_size / self.BATCH_SIZE))
                # print(n_batchs_in_epoch)

                for i in range(n_batchs_in_epoch):

                    counter += 1

                    x, y = samples_loader.next_batch_train(self.batch_size)
                    x = np.reshape(x, x.shape + (1, ))
                    start = time.time()

                    feed = {
                        self.inputs: x,
                        self.targets: y,
                        self.keep_prob: 1.0,
                        self.state_in: state_in
                    }

                    summary, batch_loss, state_in, _ = sess.run([
                        self.merged, self.loss, self.state_out, self.optimizer
                    ],
                                                                feed_dict=feed)
                    end = time.time()

                    if counter % 100 == 0:
                        print('epochs: {}/{}... '.format(e + 1, epochs),
                              'iterations: {}... '.format(counter),
                              'error: {:.4f}... '.format(batch_loss),
                              '{:.4f} sec/batch'.format((end - start)))
                        writer.add_summary(summary, counter)

                    if counter % 100 == 0:
                        error_count = 0
                        loss_count = 0
                        amount = 0
                        for _ in range(samples_loader.get_validation_count() //
                                       self.batch_size):
                            x, y = samples_loader.next_batch_validation(
                                self.batch_size)
                            x = np.reshape(x, x.shape + (1, ))

                            feed = {
                                self.inputs: x,
                                self.targets: y,
                                self.keep_prob: 1.0,
                                self.state_in: state_in
                            }

                            preds, loss, state_in = sess.run(
                                [local_prediction, self.loss, self.state_out],
                                feed_dict=feed)

                            diff = preds - np.reshape(y, [-1])

                            error_count += np.count_nonzero(diff)
                            loss_count += loss
                            amount += diff.size

                        print("validation match: {} % , avg loss {}".format(
                            (100 * (amount - error_count)) / amount,
                            loss_count / n_batchs_in_epoch))

                    if (counter % save_every_n) == 0:
                        saver.save(
                            sess,
                            os.path.join(
                                self.model_path, "{}_trainingLoss{}".format(
                                    self.prefix_name(), batch_loss)))

            saver.save(
                sess,
                os.path.join(
                    self.model_path,
                    "{}_trainingLoss{}".format(self.prefix_name(),
                                               batch_loss)))