示例#1
0
def model_fn(features, labels, mode, params):
    model = GRUModel(params.feature_size, params.gru_num_units,
                     params.attention_size)
    if (params.window_size == 1):
        logits = model.build_full_model_without_attention(
            features, params.feature_extract_layers)
    else:
        logits = model.build_full_model_with_window(
            features, params.feature_extract_layers, params.window_size)
    if mode == tf.estimator.ModeKeys.TRAIN:
        labels = tf.one_hot(labels, 2)
        epsilon = 1e-8
        if params.focal_loss:
            loss = tf.reduce_mean(-tf.reduce_sum(tf.pow(1 - logits, 2) *
                                                 labels *
                                                 tf.log(logits + epsilon),
                                                 reduction_indices=[1]))
        else:
            loss = tf.reduce_mean(-tf.reduce_sum(
                labels * tf.log(logits + epsilon), reduction_indices=[1]))

        def learning_rate_decay_fn(learning_rate, global_step):
            return tf.train.exponential_decay(learning_rate,
                                              global_step,
                                              decay_steps=2000,
                                              decay_rate=0.9)

        tv = tf.trainable_variables()
        regularization_cost = 5e-4 * tf.reduce_sum(
            [tf.nn.l2_loss(v) for v in tv])
        tf.summary.scalar("regularization_cost", regularization_cost)
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss + regularization_cost,
            global_step=tf.train.get_global_step(),
            learning_rate=params.learning_rate,
            optimizer=optimizer,
            learning_rate_decay_fn=learning_rate_decay_fn,
            clip_gradients=params.clip_gradients)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)
    elif mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {'probabilities': logits}
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    else:
        raise NotImplementedError()
示例#2
0
                                   D_p,
                                   D_e,
                                   D_h,
                                   D_a,
                                   n_classes=n_classes,
                                   listener_state=args.active_listener,
                                   context_attention=args.attention,
                                   dropout_rec=args.rec_dropout,
                                   dropout=args.dropout)

            print('Basic Dialog RNN Model.')

        elif args.base_model == 'GRU':
            model = GRUModel(D_m,
                             D_e,
                             D_h,
                             n_classes=n_classes,
                             dropout=args.dropout)

            print('Basic GRU Model.')

        elif args.base_model == 'LSTM':
            model = LSTMModel(D_m,
                              D_e,
                              D_h,
                              n_classes=n_classes,
                              dropout=args.dropout)

            print('Basic LSTM Model.')

        else:
示例#3
0
    def __init__(self, env, obs_space, action_space, ignoreLTL, gnn_type,
                 dumb_ac, freeze_ltl):
        super().__init__()

        # Decide which components are enabled
        self.use_progression_info = "progress_info" in obs_space
        self.use_text = not ignoreLTL and (gnn_type == "GRU" or gnn_type
                                           == "LSTM") and "text" in obs_space
        self.use_ast = not ignoreLTL and ("GCN"
                                          in gnn_type) and "text" in obs_space
        self.gnn_type = gnn_type
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.action_space = action_space
        self.dumb_ac = dumb_ac

        self.freeze_pretrained_params = freeze_ltl
        if self.freeze_pretrained_params:
            print("Freezing the LTL module.")

        self.env_model = getEnvModel(env, obs_space)

        # Define text embedding
        if self.use_progression_info:
            self.text_embedding_size = 32
            self.simple_encoder = nn.Sequential(
                nn.Linear(obs_space["progress_info"], 64), nn.Tanh(),
                nn.Linear(64, self.text_embedding_size),
                nn.Tanh()).to(self.device)
            print(
                "Linear encoder Number of parameters:",
                sum(p.numel() for p in self.simple_encoder.parameters()
                    if p.requires_grad))

        elif self.use_text:
            self.word_embedding_size = 32
            self.text_embedding_size = 32
            if self.gnn_type == "GRU":
                self.text_rnn = GRUModel(
                    obs_space["text"], self.word_embedding_size, 16,
                    self.text_embedding_size).to(self.device)
            else:
                assert (self.gnn_type == "LSTM")
                self.text_rnn = LSTMModel(
                    obs_space["text"], self.word_embedding_size, 16,
                    self.text_embedding_size).to(self.device)
            print(
                "RNN Number of parameters:",
                sum(p.numel() for p in self.text_rnn.parameters()
                    if p.requires_grad))

        elif self.use_ast:
            hidden_dim = 32
            self.text_embedding_size = 32
            self.gnn = GNNMaker(self.gnn_type, obs_space["text"],
                                self.text_embedding_size).to(self.device)
            print(
                "GNN Number of parameters:",
                sum(p.numel() for p in self.gnn.parameters()
                    if p.requires_grad))

        # Memory specific code.
        self.image_embedding_size = self.env_model.size()
        self.memory_rnn = nn.LSTMCell(self.image_embedding_size,
                                      self.semi_memory_size)
        self.embedding_size = self.semi_memory_size

        print("embedding size:", self.embedding_size)
        if self.use_text or self.use_ast or self.use_progression_info:
            self.embedding_size += self.text_embedding_size

        if self.dumb_ac:
            # Define actor's model
            self.actor = PolicyNetwork(self.embedding_size, self.action_space)

            # Define critic's model
            self.critic = nn.Sequential(nn.Linear(self.embedding_size, 1))
        else:
            # Define actor's model
            self.actor = PolicyNetwork(self.embedding_size,
                                       self.action_space,
                                       hiddens=[64, 64, 64],
                                       activation=nn.ReLU())

            # Define critic's model
            self.critic = nn.Sequential(nn.Linear(self.embedding_size, 64),
                                        nn.Tanh(), nn.Linear(64, 64),
                                        nn.Tanh(), nn.Linear(64, 1))

        # Initialize parameters correctly
        self.apply(init_params)
示例#4
0
def train(trainX, trainY, epoch, lr, batchSize, modelPath, lookBack, method):

    lossFilePath = "../model/loss_ResRNN-4.pkl"
    output = open(lossFilePath, 'wb')
    lossList = []

    n = trainX.shape[0]
    print("trainx num is:", n)
    batchNum = n // batchSize - 1

    print("batch num is:", batchNum)

    if method == "RNN":
        net = RNNModel(inputDim=1,
                       hiddenNum=100,
                       outputDim=1,
                       layerNum=1,
                       cell="RNN")
    if method == "LSTM":
        net = LSTMModel(inputDim=1,
                        hiddenNum=100,
                        outputDim=1,
                        layerNum=1,
                        cell="LSTM")
    if method == "GRU":
        net = GRUModel(inputDim=1,
                       hiddenNum=100,
                       outputDim=1,
                       layerNum=1,
                       cell="GRU")
    if method == "ResRNN":
        #net = ResidualRNNModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="RNNCell")
        net = ResRNNModel(inputDim=1, hiddenNum=100, outputDim=1, resDepth=-1)
    if method == "attention":
        net = AttentionRNNModel(inputDim=1,
                                hiddenNum=100,
                                outputDim=1,
                                seqLen=lookBack)

    if method == "ANN":
        net = ANNModel(inputDim=lookBack, hiddenNum=100, outputDim=1)

    if method == "new":
        net = DecompositionNetModel(inputDim=lookBack,
                                    fchiddenNum=100,
                                    rnnhiddenNum=100,
                                    outputDim=1)
    optimizer = optim.RMSprop(net.parameters(), lr=lr, momentum=0.9)
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    #optimizer = optim.SGD(net.parameters(), lr=0.001)

    t1 = time.time()
    for i in range(epoch):
        trainX, trainY = shuffle(trainX, trainY, random_state=epoch)
        batchStart = 0
        lossSum = 0

        for j in range(batchNum):

            x = trainX[batchStart:batchStart + batchSize, :, :]
            y = trainY[batchStart:batchStart + batchSize]

            x = torch.from_numpy(x)
            y = torch.from_numpy(y)
            x, y = Variable(x), Variable(y)

            optimizer.zero_grad()

            if method == "new":
                pred = net.forward(x, batchSize=batchSize)
                # criterion = nn.MSELoss()
                #loss = criterion(pred, y)
                loss = MSE_Loss(pred, y)
            else:
                pred = net.forward(x, batchSize=batchSize)
                criterion = nn.MSELoss()
                loss = criterion(pred, y)

            lossSum += loss.data.numpy()[0]
            if j % 30 == 0 and j != 0:
                print("current loss is:", lossSum / 10)
                lossList.append(lossSum / 10)
                lossSum = 0

            #net.zero_grad()
            loss.backward()
            optimizer.step()
            #scheduler.step(loss)

            batchStart += batchSize
        print("%d epoch is finished!" % i)
    t2 = time.time()
    print("train time:", t2 - t1)
    p.dump(lossList, output, -1)

    torch.save(net, modelPath)
示例#5
0
def create_batch(tensor, batch_size):
    return [tensor] * batch_size


batch_zero_parah = create_batch(zero_parah, batch_size)
batch_zero_input = create_batch(zero_input, batch_size)
batch_zero_state = create_batch(zero_state, batch_size)

costs = []

sess = tf.Session()
model = GRUModel(input_c,
                 input_q,
                 input_r,
                 input_w,
                 state,
                 dropout,
                 num_hidden=vec_len)
model.load(sess, save_dir='save', dataset=dataset)


# ==================================================
def encode(v, q):
    prev = batch_zero_state
    for x in q:
        batch_q = create_batch([x], batch_size)  # each word from vq
        for y in v:
            batch_w = create_batch([y], batch_size)  # each word from vc
            prev = sess.run(
                model.prediction, {
示例#6
0
                                  unk_token='[UNK]',
                                  pad_token='[PAD]')
    # Loads dataset.
    train_ds, dev_ds, test_ds = load_dataset("chnsenticorp",
                                             splits=["train", "dev", "test"])

    # Constructs the newtork.
    network = args.network.lower()
    vocab_size = len(vocab)
    num_classes = len(train_ds.label_list)
    pad_token_id = vocab.to_indices('[PAD]')
    if network == 'bow':
        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'bigru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'bilstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='bidirect',
                          padding_idx=pad_token_id)
    elif network == 'bilstm_attn':
        lstm_hidden_size = 196
        attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
        model = BiLSTMAttentionModel(attention_layer=attention,
                                     vocab_size=vocab_size,
                                     lstm_hidden_size=lstm_hidden_size,
                                     num_classes=num_classes,
                                     padding_idx=pad_token_id)
    elif network == 'birnn':
示例#7
0

trainset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
testset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor())


batch_size = 100
 
trainloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)


n_in = 28
n_hidden = 100
n_out = 10
seq_dim = 28
use_gpu = True

#model = LSTMModel(n_in, n_hidden, n_out, batch_size, use_gpu)
model = GRUModel(n_in, n_hidden, n_out, batch_size, use_gpu)

if use_gpu:
    model.cuda()

l2 = 0.0
lr = 0.001
epochs = 10
optim = 'adam'
modeleval = ModelEvaluator(model, epochs, lr, batch_size, l2, use_gpu, optim)
acc_ = modeleval.evaluator(trainloader, testloader, seq_dim, n_in)
示例#8
0
文件: main.py 项目: mlsamsom/mlutils
from utils import graph_spectrogram
from model import GRUModel

Tx = 5511
Ty = 1375
n_freq = 101

if __name__ == "__main__":
    m = GRUModel("./config.yaml")
示例#9
0
print("Vocabulary Size: {:d}".format(len(vocab.vocabulary_)))
print("Train Question Size: {:d}".format(len(train_data)))
# Build Model
# ==================================================
row_size, rows = len(iq[0]), 1
input_c = tf.placeholder(tf.float32, [None, rows, row_size], name="ic")
input_q = tf.placeholder(tf.float32, [None, rows, row_size], name="iq")
input_r = tf.placeholder(tf.float32, [None, rows, row_size], name="ir")
input_w = tf.placeholder(tf.float32, [None, rows, row_size], name="iw")
state = tf.placeholder(tf.float32, [None, row_size], name="state")
dropout = tf.placeholder(tf.float32, name="dropout")
print("Building Model...")
model = GRUModel(input_c,
                 input_q,
                 input_r,
                 input_w,
                 state,
                 dropout,
                 num_hidden=max_len)


# Train Model
# ==================================================
def encode(c_batch, q_batch):
    def merge(article, question):
        prev = zero_state
        for sent in article:
            prev = sess.run(
                model.prediction,
                {
                    input_c: [sent],  # 1 x [rows x row_size]
示例#10
0
# load dataset
train_set = NameDataset(train=True)
test_set = NameDataset(train=False)
train_loader = DataLoader(dataset=train_set,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=NUM_WORKERS)
test_loader = DataLoader(dataset=test_set,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=NUM_WORKERS)

N_COUNTRY = train_set.getCountriesNum()  # 最终输出维度(类别数)

# load model
model = GRUModel(N_CHARS, HIDDEN_SIZE, N_COUNTRY,
                 N_LAYER)  # 字典长度(嵌入层维度)、 隐层数、 国家数(输出维度)、 GRU层数
model.to(device)

# construct loss and optimizer
criterion = nn.CrossEntropyLoss(reduction='mean')
# optimizer = optim.Adam(model.parameters(),
#                        lr=LEARNING_RATE)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.5)


# define train and test model
def train():
    for (names, countries) in train_loader:
        inputs, seq_lengths, target = make_tensors(names, countries)
        y_pred = model(inputs, seq_lengths)  # forward
        loss = criterion(y_pred, target)
示例#11
0
def main():
    # Load vocab.
    vocab = Vocab.from_json(args.vocab_path)
    label_map = {0: 'negative', 1: 'positive'}

    # Constructs the newtork.
    network = args.network.lower()
    vocab_size = len(vocab)
    num_classes = len(label_map)
    pad_token_id = vocab.to_indices('[PAD]')
    if network == 'bow':
        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'bigru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'bilstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='bidirect',
                          padding_idx=pad_token_id)
    elif network == 'bilstm_attn':
        lstm_hidden_size = 196
        attention = SelfInteractiveAttention(hidden_size=2 * lstm_hidden_size)
        model = BiLSTMAttentionModel(attention_layer=attention,
                                     vocab_size=vocab_size,
                                     lstm_hidden_size=lstm_hidden_size,
                                     num_classes=num_classes,
                                     padding_idx=pad_token_id)
    elif network == 'birnn':
        model = RNNModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'cnn':
        model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'gru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='forward',
                         padding_idx=pad_token_id,
                         pooling_type='max')
    elif network == 'lstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='forward',
                          padding_idx=pad_token_id,
                          pooling_type='max')
    elif network == 'rnn':
        model = RNNModel(vocab_size,
                         num_classes,
                         direction='forward',
                         padding_idx=pad_token_id,
                         pooling_type='max')
    else:
        raise ValueError(
            "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
            % network)

    # Load model parameters.
    state_dict = paddle.load(args.params_path)
    model.set_dict(state_dict)
    model.eval()

    inputs = [paddle.static.InputSpec(shape=[None, None], dtype="int64")]
    # Convert to static graph with specific input description
    if args.network in [
            "lstm", "bilstm", "gru", "bigru", "rnn", "birnn", "bilstm_attn"
    ]:
        inputs.append(paddle.static.InputSpec(shape=[None],
                                              dtype="int64"))  # seq_len

    model = paddle.jit.to_static(model, input_spec=inputs)
    # Save in static graph model.
    paddle.jit.save(model, args.output_path)