Exemplo n.º 1
0
    def load_word_embeddings(emb_path,
                             emb_dim,
                             data_file,
                             min_freq=1,
                             verbose=True):
        ## pre-load emb words
        from deepats import ets_reader
        from deepats.w2vEmbReader import W2VEmbReader as EmbReader

        emb_reader = EmbReader(emb_path, emb_dim)
        emb_words = emb_reader.load_words()

        text = U.read_col(data_file, col=-1, type='string')
        vocab = ets_reader.create_vocab(text,
                                        tokenize_text=True,
                                        to_lower=True,
                                        min_word_freq=min_freq,
                                        emb_words=emb_words)
        #  vocab = {'<pad>':0, '<unk>':1, '<num>':2, .....}

        #######################################################
        pad = '<pad>'
        unk = '<unk>'
        num = '<num>'
        words = set(vocab)
        words.discard(pad)
        words.discard(unk)
        words.discard(num)

        emb_file = emb_path.format(emb_dim)
        word2emb = load_embeddings(emb_file,
                                   filter_words=words,
                                   verbose=verbose)

        n = len(word2emb) + 3
        d = word2emb[next(iter(word2emb))].size
        E = np.zeros(
            [n, d],
            dtype=np.float32)  # <unk> is given all-zero embedding... at E[0,:]

        word_vocab = Vocab(unk_index=1)
        word_vocab.feed(pad)  # <pad> is at index 0 in word vocab
        word_vocab.feed(
            unk
        )  # <unk> is at index 1 in word vocab --> so idx=1 returned for unknown toks
        word_vocab.feed(num)  # <num> is at index 2 in word vocab
        for word in list(word2emb):
            idx = word_vocab.feed(word)
            E[idx, :] = word2emb[word]
            #print(word)

        return E, word_vocab
Exemplo n.º 2
0
    def load_word_embeddings_NEW(emb_path,
                                 emb_dim,
                                 data_file,
                                 min_freq=1,
                                 unk='<unk>',
                                 eos='+',
                                 verbose=True):
        ## pre-load emb words
        from deepats import ets_reader
        from deepats.w2vEmbReader import W2VEmbReader as EmbReader

        emb_reader = EmbReader(emb_path, emb_dim)
        emb_words = emb_reader.load_words()

        text = U.read_col(data_file, col=-1, type='string')
        vocab = ets_reader.create_vocab(text,
                                        tokenize_text=True,
                                        to_lower=True,
                                        min_word_freq=min_freq,
                                        emb_words=emb_words)

        #######################################################
        words = set(vocab)
        words.discard(unk)

        emb_file = emb_path.format(emb_dim)
        word2emb = load_embeddings(emb_file,
                                   filter_words=words,
                                   verbose=verbose)

        n = len(word2emb) + 3
        d = word2emb[next(iter(word2emb))].size
        E = np.zeros(
            [n, d],
            dtype=np.float32)  # <unk> is given all-zero embedding... at E[0,:]

        word_vocab = Vocab()
        word_vocab.feed(unk)
        if eos: word_vocab.feed(eos)

        for word in list(word2emb):
            idx = word_vocab.feed(word)
            E[idx, :] = word2emb[word]
            #print(word)

        return E, word_vocab
Exemplo n.º 3
0
def data():
    from keras.utils import np_utils
    from keras.preprocessing import sequence
    import keras.backend as K
    import numpy as np
    import pickle as pk
    import os

    from deepats.w2vEmbReader import W2VEmbReader as EmbReader
    import deepats.ets_reader as dataset
    from deepats.ets_config import get_args

    args = get_args()

    emb_reader = EmbReader(args.emb_path, args.emb_dim)
    emb_words = emb_reader.load_words()

    train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data(
        args.data_path, emb_words=emb_words, seed=args.seed)

    train_x = train_df['text'].values
    train_y = train_df['y'].values
    dev_x = dev_df['text'].values
    dev_y = dev_df['y'].values
    test_x = test_df['text'].values
    test_y = test_df['y'].values

    abs_vocab_file = os.path.join(args.abs_out, 'vocab.pkl')
    with open(abs_vocab_file, 'wb') as vocab_file:
        pk.dump(vocab, vocab_file)

    train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen)
    dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen)
    test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen)

    return train_x, train_y, dev_x, dev_y, test_x, test_y, overal_maxlen, qwks
Exemplo n.º 4
0
def run(argv=None):

    parser = argparse.ArgumentParser()
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir_path",
                        type=str,
                        metavar='<str>',
                        required=True,
                        help="The path to the output directory")
    parser.add_argument(
        "-p",
        "--prompt",
        dest="prompt_id",
        type=int,
        metavar='<int>',
        required=False,
        help="Promp ID for ASAP dataset. '0' means all prompts.")
    parser.add_argument("-t",
                        "--type",
                        dest="model_type",
                        type=str,
                        metavar='<str>',
                        default='regp',
                        help="Model type (reg|regp|breg|bregp) (default=regp)")
    parser.add_argument(
        "-u",
        "--rec-unit",
        dest="recurrent_unit",
        type=str,
        metavar='<str>',
        default='lstm',
        help="Recurrent unit type (lstm|gru|simple) (default=lstm)")
    parser.add_argument(
        "-a",
        "--algorithm",
        dest="algorithm",
        type=str,
        metavar='<str>',
        default='rmsprop',
        help=
        "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)"
    )
    parser.add_argument("-l",
                        "--loss",
                        dest="loss",
                        type=str,
                        metavar='<str>',
                        default='mse',
                        help="Loss function (mse|mae) (default=mse)")
    parser.add_argument("-e",
                        "--embdim",
                        dest="emb_dim",
                        type=int,
                        metavar='<int>',
                        default=50,
                        help="Embeddings dimension (default=50)")
    parser.add_argument(
        "-c",
        "--cnndim",
        dest="cnn_dim",
        type=int,
        metavar='<int>',
        default=0,
        help="CNN output dimension. '0' means no CNN layer (default=0)")
    parser.add_argument("-w",
                        "--cnnwin",
                        dest="cnn_window_size",
                        type=int,
                        metavar='<int>',
                        default=3,
                        help="CNN window size. (default=3)")
    parser.add_argument(
        "-r",
        "--rnndim",
        dest="rnn_dim",
        type=int,
        metavar='<int>',
        default=300,
        help="RNN dimension. '0' means no RNN layer (default=300)")
    parser.add_argument("-b",
                        "--batch-size",
                        dest="batch_size",
                        type=int,
                        metavar='<int>',
                        default=32,
                        help="Batch size (default=32)")
    parser.add_argument("-v",
                        "--vocab-size",
                        dest="vocab_size",
                        type=int,
                        metavar='<int>',
                        default=4000,
                        help="Vocab size (default=4000)")
    parser.add_argument(
        "--aggregation",
        dest="aggregation",
        type=str,
        metavar='<str>',
        default='mot',
        help=
        "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)"
    )
    parser.add_argument(
        "--dropout",
        dest="dropout_prob",
        type=float,
        metavar='<float>',
        default=0.5,
        help=
        "The dropout probability. To disable, give a negative number (default=0.5)"
    )
    parser.add_argument(
        "--vocab-path",
        dest="vocab_path",
        type=str,
        metavar='<str>',
        help="(Optional) The path to the existing vocab file (*.pkl)")
    parser.add_argument("--skip-init-bias",
                        dest="skip_init_bias",
                        action='store_true',
                        help="Skip initialization of the last layer bias")
    parser.add_argument(
        "--emb",
        dest="emb_path",
        type=str,
        metavar='<str>',
        help="The path to the word embeddings file (Word2Vec format)")
    parser.add_argument("--epochs",
                        dest="epochs",
                        type=int,
                        metavar='<int>',
                        default=100,
                        help="Number of epochs (default=50)")
    parser.add_argument(
        "--maxlen",
        dest="maxlen",
        type=int,
        metavar='<int>',
        default=0,
        help=
        "Maximum allowed number of words during training. '0' means no limit (default=0)"
    )
    parser.add_argument("--seed",
                        dest="seed",
                        type=int,
                        metavar='<int>',
                        default=1234,
                        help="Random seed (default=1234)")
    ## dsv
    parser.add_argument("--min-word-freq",
                        dest="min_word_freq",
                        type=int,
                        metavar='<int>',
                        default=2,
                        help="Min word frequency")
    parser.add_argument("--stack",
                        dest="stack",
                        type=int,
                        metavar='<int>',
                        default=1,
                        help="how deep to stack core RNN")
    parser.add_argument("--skip-emb-preload",
                        dest="skip_emb_preload",
                        action='store_true',
                        help="Skip preloading embeddings")
    parser.add_argument("--tokenize-old",
                        dest="tokenize_old",
                        action='store_true',
                        help="use old tokenizer")

    parser.add_argument("-ar",
                        "--abs-root",
                        dest="abs_root",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to root directory")
    parser.add_argument("-ad",
                        "--abs-data",
                        dest="abs_data",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to data directory")
    parser.add_argument("-ao",
                        "--abs-out",
                        dest="abs_out",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    parser.add_argument("-dp",
                        "--data-path",
                        dest="data_path",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    ##

    if argv is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(argv)

    out_dir = args.abs_out
    U.mkdir_p(os.path.join(out_dir, 'preds'))
    U.set_logger(out_dir)
    U.print_args(args)

    assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'}
    assert args.algorithm in {
        'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax'
    }
    assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'}
    assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'}
    assert args.aggregation in {'mot', 'attsum', 'attmean'}

    if args.seed > 0:
        RANDSEED = args.seed
    else:
        RANDSEED = np.random.randint(10000)
    np.random.seed(RANDSEED)

    #######################

    #from deepats.util import GPUtils as GPU
    import GPUtil as GPU
    mem = GPU.avail_mem()
    logger.info('AVAIL GPU MEM == %.4f' % mem)
    # 	if mem < 0.05:
    # 		return None
    ###############################################################################################################################
    ## Prepare data
    #

    emb_words = None
    if not args.skip_emb_preload:  #if args.emb_path:
        from deepats.w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Loading embedding vocabulary...')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        emb_words = emb_reader.load_words()

    train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data(
        args.data_path, emb_words=emb_words, seed=RANDSEED)
    vocab_size = len(vocab)

    train_x = train_df['text'].values
    train_y = train_df['y'].values
    dev_x = dev_df['text'].values
    dev_y = dev_df['y'].values
    test_x = test_df['text'].values
    test_y = test_df['y'].values

    # Dump vocab

    abs_vocab_file = os.path.join(out_dir, 'vocab.pkl')
    with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file:
        pk.dump(vocab, vocab_file)

    if args.recurrent_unit == 'rwa':
        setattr(args, 'model_type', 'rwa')

    # Pad sequences for mini-batch processing
    from keras.preprocessing import sequence

    if args.model_type in {'breg', 'bregp', 'rwa'}:
        assert args.rnn_dim > 0
        train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen)
        dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen)
        test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen)
    else:
        train_x = sequence.pad_sequences(train_x)
        dev_x = sequence.pad_sequences(dev_x)
        test_x = sequence.pad_sequences(test_x)

    ###############################################################################################################################
    ## Some statistics


# 	train_y = np.array(train_y, dtype=K.floatx())
# 	dev_y = np.array(dev_y, dtype=K.floatx())
# 	test_y = np.array(test_y, dtype=K.floatx())

    bincounts, mfs_list = U.bincounts(train_y)
    with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file:
        for bincount in bincounts:
            output_file.write(str(bincount) + '\n')

    train_mean = train_y.mean(axis=0)
    train_std = train_y.std(axis=0)
    dev_mean = dev_y.mean(axis=0)
    dev_std = dev_y.std(axis=0)
    test_mean = test_y.mean(axis=0)
    test_std = test_y.std(axis=0)

    logger.info('Statistics:')
    logger.info('  TEST KAPPAS (float, int)= \033[92m%.4f (%.4f)\033[0m ' %
                (qwks[1], qwks[0]))
    logger.info('  RANDSEED =   ' + str(RANDSEED))
    logger.info('  train_x shape: ' + str(np.array(train_x).shape))
    logger.info('  dev_x shape:   ' + str(np.array(dev_x).shape))
    logger.info('  test_x shape:  ' + str(np.array(test_x).shape))
    logger.info('  train_y shape: ' + str(train_y.shape))
    logger.info('  dev_y shape:   ' + str(dev_y.shape))
    logger.info('  test_y shape:  ' + str(test_y.shape))
    logger.info('  train_y mean: %s, stdev: %s, MFC: %s' %
                (str(train_mean), str(train_std), str(mfs_list)))
    logger.info('  overal_maxlen:  ' + str(overal_maxlen))

    ###############################################################################################################################
    ## Optimizaer algorithm
    #

    from deepats.optimizers import get_optimizer
    #optimizer = get_optimizer(args)

    from keras import optimizers

    ## RMS-PROP

    #optimizer = optimizers.RMSprop(lr=0.00075, rho=0.9, clipnorm=1)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, clipnorm=1)
    optimizer = optimizers.RMSprop(lr=0.001,
                                   rho=0.9,
                                   epsilon=1e-6,
                                   clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.0018, rho=0.88, epsilon=1e-6, clipnorm=10)

    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-6, clipnorm=10)# best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.88, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-8, clipnorm=10) # best 2.10 (RWA)

    ## OTHER METHODS
    #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5)
    #optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1)
    #optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-06, clipnorm=10)

    #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10)
    #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=10)

    ###############################################################################################################################
    ## Building model
    #
    if args.loss == 'mse':
        loss = 'mean_squared_error'
        metric = kappa_metric
        metric_name = 'kappa_metric'
    elif args.loss == 'mae':
        loss = 'mean_absolute_error'
        metric = kappa_metric
        metric_name = 'kappa_metric'
    elif args.loss == 'kappa':
        loss = kappa_loss
        metric = kappa_metric
        metric_name = 'kappa_metric'

    ########################################################

    from deepats.models import create_model
    model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab)

    ############################################
    '''
	# test yaml serialization/de-serialization
	yaml = model.to_yaml()
	print yaml
	from deepats.my_layers import MeanOverTime
	from deepats.rwa import RWA
	model = model_from_yaml(yaml, custom_objects={'MeanOverTime': MeanOverTime, 'RWA':RWA})
	'''
    ############################################

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    print(model.summary())

    ###############################################################################################################################
    ## Plotting model
    #
    # 	from keras.utils.visualize_util import plot
    # 	plot(model, to_file = os.path.join(out_dir,'model.png'))

    ###############################################################################################################################
    ## Save model architecture
    #

    logger.info('Saving model architecture')
    with open(os.path.join(out_dir, 'model_arch.json'), 'w') as arch:
        arch.write(model.to_json(indent=2))
    logger.info('  Done')

    ###############################################################################################################################
    ## Evaluator
    #
    evl = Evaluator(dataset, args.prompt_id, out_dir, dev_x, test_x, dev_df,
                    test_df)

    ###############################################################################################################################
    ## Training
    #

    logger.info(
        '----------------------------------------------------------------')
    logger.info('Initial Evaluation:')
    evl.evaluate(model, -1, print_info=True)

    total_train_time = 0
    total_eval_time = 0

    for ii in range(args.epochs):
        # Training
        t0 = time()
        train_history = model.fit(train_x,
                                  train_y,
                                  batch_size=args.batch_size,
                                  epochs=1,
                                  verbose=0)
        tr_time = time() - t0
        total_train_time += tr_time

        # Evaluate
        t0 = time()
        evl.evaluate(model, ii)
        evl_time = time() - t0
        total_eval_time += evl_time

        # Print information
        train_loss = train_history.history['loss'][0]
        train_metric = train_history.history[metric_name][0]
        logger.info('Epoch %d, train: %is, evaluation: %is' %
                    (ii, tr_time, evl_time))
        logger.info('[Train] loss: %.4f, metric: %.4f' %
                    (train_loss, train_metric))
        evl.print_info()

    ###############################################################################################################################
    ## Summary of the results
    #

    logger.info('Training:   %i seconds in total' % total_train_time)
    logger.info('Evaluation: %i seconds in total' % total_eval_time)

    evl.print_final_info()
Exemplo n.º 5
0
def data():
    from keras.utils import np_utils
    from keras.preprocessing import sequence
    import keras.backend as K
    import numpy as np

    import pickle as pk
    import deepats.asap_reader as dataset
    from deepats.w2vEmbReader import W2VEmbReader as EmbReader
    from deepats.config import get_args

    import logging
    logger = logging.getLogger(__name__)

    args = get_args()

    if args.seed > 0:
        np.random.seed(args.seed)

    emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
    emb_words = emb_reader.load_words()

    dataset.set_score_range(args.data_set)
    (train_x, train_y, train_pmt), (dev_x, dev_y, dev_pmt), (
        test_x, test_y, test_pmt
    ), vocab, vocab_size, overal_maxlen, num_outputs = dataset.get_data(
        (args.train_path, args.dev_path, args.test_path),
        args.prompt_id,
        args.vocab_size,
        args.maxlen,
        tokenize_text=True,
        to_lower=True,
        sort_by_len=False,
        vocab_path=args.vocab_path,
        min_word_freq=args.min_word_freq,
        emb_words=emb_words)

    abs_vocab_file = args.abs_out_path + '/vocab.pkl'
    with open(abs_vocab_file, 'wb') as vocab_file:
        pk.dump(vocab, vocab_file)

    train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen)
    dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen)
    test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen)

    train_y = np.array(train_y, dtype=K.floatx())
    dev_y = np.array(dev_y, dtype=K.floatx())
    test_y = np.array(test_y, dtype=K.floatx())

    if args.prompt_id:
        train_pmt = np.array(train_pmt, dtype='int32')
        dev_pmt = np.array(dev_pmt, dtype='int32')
        test_pmt = np.array(test_pmt, dtype='int32')

    dev_y_org = dev_y.astype(dataset.get_ref_dtype())
    test_y_org = test_y.astype(dataset.get_ref_dtype())

    train_y = dataset.get_model_friendly_scores(train_y, train_pmt)
    dev_y = dataset.get_model_friendly_scores(dev_y, dev_pmt)
    test_y = dataset.get_model_friendly_scores(test_y, test_pmt)

    return train_x, train_y, dev_x, dev_y, test_x, test_y, dev_y_org, test_y_org, overal_maxlen
Exemplo n.º 6
0
def run(argv=None):

    parser = argparse.ArgumentParser()
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir_path",
                        type=str,
                        metavar='<str>',
                        required=True,
                        help="The path to the output directory")
    parser.add_argument(
        "-p",
        "--prompt",
        dest="prompt_id",
        type=int,
        metavar='<int>',
        required=False,
        help="Promp ID for ASAP dataset. '0' means all prompts.")
    parser.add_argument("-t",
                        "--type",
                        dest="model_type",
                        type=str,
                        metavar='<str>',
                        default='regp',
                        help="Model type (reg|regp|breg|bregp) (default=regp)")
    parser.add_argument(
        "-u",
        "--rec-unit",
        dest="recurrent_unit",
        type=str,
        metavar='<str>',
        default='lstm',
        help="Recurrent unit type (lstm|gru|simple) (default=lstm)")
    parser.add_argument(
        "-a",
        "--algorithm",
        dest="algorithm",
        type=str,
        metavar='<str>',
        default='rmsprop',
        help=
        "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)"
    )
    parser.add_argument("-l",
                        "--loss",
                        dest="loss",
                        type=str,
                        metavar='<str>',
                        default='mse',
                        help="Loss function (mse|mae) (default=mse)")
    parser.add_argument("-e",
                        "--embdim",
                        dest="emb_dim",
                        type=int,
                        metavar='<int>',
                        default=50,
                        help="Embeddings dimension (default=50)")
    parser.add_argument(
        "-c",
        "--cnndim",
        dest="cnn_dim",
        type=int,
        metavar='<int>',
        default=0,
        help="CNN output dimension. '0' means no CNN layer (default=0)")
    parser.add_argument("-w",
                        "--cnnwin",
                        dest="cnn_window_size",
                        type=int,
                        metavar='<int>',
                        default=3,
                        help="CNN window size. (default=3)")
    parser.add_argument(
        "-r",
        "--rnndim",
        dest="rnn_dim",
        type=int,
        metavar='<int>',
        default=300,
        help="RNN dimension. '0' means no RNN layer (default=300)")
    parser.add_argument("-b",
                        "--batch-size",
                        dest="batch_size",
                        type=int,
                        metavar='<int>',
                        default=32,
                        help="Batch size (default=32)")
    parser.add_argument("-v",
                        "--vocab-size",
                        dest="vocab_size",
                        type=int,
                        metavar='<int>',
                        default=4000,
                        help="Vocab size (default=4000)")
    parser.add_argument(
        "--aggregation",
        dest="aggregation",
        type=str,
        metavar='<str>',
        default='mot',
        help=
        "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)"
    )
    parser.add_argument(
        "--dropout",
        dest="dropout_prob",
        type=float,
        metavar='<float>',
        default=0.5,
        help=
        "The dropout probability. To disable, give a negative number (default=0.5)"
    )
    parser.add_argument(
        "--vocab-path",
        dest="vocab_path",
        type=str,
        metavar='<str>',
        help="(Optional) The path to the existing vocab file (*.pkl)")
    parser.add_argument("--skip-init-bias",
                        dest="skip_init_bias",
                        action='store_true',
                        help="Skip initialization of the last layer bias")
    parser.add_argument(
        "--emb",
        dest="emb_path",
        type=str,
        metavar='<str>',
        help="The path to the word embeddings file (Word2Vec format)")
    parser.add_argument("--epochs",
                        dest="epochs",
                        type=int,
                        metavar='<int>',
                        default=100,
                        help="Number of epochs (default=50)")
    parser.add_argument(
        "--maxlen",
        dest="maxlen",
        type=int,
        metavar='<int>',
        default=0,
        help=
        "Maximum allowed number of words during training. '0' means no limit (default=0)"
    )
    parser.add_argument("--seed",
                        dest="seed",
                        type=int,
                        metavar='<int>',
                        default=1234,
                        help="Random seed (default=1234)")
    ## dsv
    parser.add_argument("--min-word-freq",
                        dest="min_word_freq",
                        type=int,
                        metavar='<int>',
                        default=2,
                        help="Min word frequency")
    parser.add_argument("--stack",
                        dest="stack",
                        type=int,
                        metavar='<int>',
                        default=1,
                        help="how deep to stack core RNN")
    parser.add_argument("--skip-emb-preload",
                        dest="skip_emb_preload",
                        action='store_true',
                        help="Skip preloading embeddings")
    parser.add_argument("--tokenize-old",
                        dest="tokenize_old",
                        action='store_true',
                        help="use old tokenizer")

    parser.add_argument("-ar",
                        "--abs-root",
                        dest="abs_root",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to root directory")
    parser.add_argument("-ad",
                        "--abs-data",
                        dest="abs_data",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to data directory")
    parser.add_argument("-ao",
                        "--abs-out",
                        dest="abs_out",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    parser.add_argument("-dp",
                        "--data-path",
                        dest="data_path",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    ##

    if argv is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(argv)

    out_dir = args.abs_out
    U.mkdir_p(os.path.join(out_dir, 'preds'))
    U.set_logger(out_dir)
    U.print_args(args)

    assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'}
    assert args.algorithm in {
        'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax'
    }
    assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'}
    assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'}
    assert args.aggregation in {'mot', 'attsum', 'attmean'}

    if args.seed > 0:
        RANDSEED = args.seed
    else:
        RANDSEED = np.random.randint(10000)
    np.random.seed(RANDSEED)

    #######################

    #from deepats.util import GPUtils as GPU
    import GPUtil as GPU
    mem = GPU.avail_mem()
    logger.info('AVAIL GPU MEM == %.4f' % mem)
    # 	if mem < 0.05:
    # 		return None
    ###############################################################################################################################
    ## Prepare data
    #

    emb_words = None
    if not args.skip_emb_preload:  #if args.emb_path:
        from deepats.w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Loading embedding vocabulary...')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        emb_words = emb_reader.load_words()

    train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data(
        args.data_path, emb_words=emb_words, seed=RANDSEED)
    vocab_size = len(vocab)

    train_x = train_df['text'].values
    train_y = train_df['y'].values
    dev_x = dev_df['text'].values
    dev_y = dev_df['y'].values
    test_x = test_df['text'].values
    test_y = test_df['y'].values

    # Dump vocab

    abs_vocab_file = os.path.join(out_dir, 'vocab.pkl')
    with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file:
        pk.dump(vocab, vocab_file)

    if args.recurrent_unit == 'rwa':
        setattr(args, 'model_type', 'rwa')

    # Pad sequences for mini-batch processing
    from keras.preprocessing import sequence

    train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen)
    dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen)
    test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen)

    ###############################################################################################################################
    ## Some statistics

    bincounts, mfs_list = U.bincounts(train_y)
    with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file:
        for bincount in bincounts:
            output_file.write(str(bincount) + '\n')

    train_mean = train_y.mean(axis=0)
    train_std = train_y.std(axis=0)
    dev_mean = dev_y.mean(axis=0)
    dev_std = dev_y.std(axis=0)
    test_mean = test_y.mean(axis=0)
    test_std = test_y.std(axis=0)

    logger.info('Statistics:')
    logger.info('  PROMPT_ID\t= ' + U.b_green(args.prompt_id))
    logger.info(
        '  TEST KAPPAS\t= {} (float, int)'.format(U.b_green('%.4f (%.4f)')) %
        (qwks[1], qwks[0]))
    logger.info('  RANDSEED\t= ' + U.b_green(str(RANDSEED)))
    logger.info('  train_x shape: ' + str(np.array(train_x).shape))
    logger.info('  dev_x shape:   ' + str(np.array(dev_x).shape))
    logger.info('  test_x shape:  ' + str(np.array(test_x).shape))
    logger.info('  train_y shape: ' + str(train_y.shape))
    logger.info('  dev_y shape:   ' + str(dev_y.shape))
    logger.info('  test_y shape:  ' + str(test_y.shape))
    logger.info('  train_y mean: %s, stdev: %s, MFC: %s' %
                (str(train_mean), str(train_std), str(mfs_list)))
    logger.info('  overal_maxlen:  ' + str(overal_maxlen))

    ###############################################################################################################################
    ## Optimizaer algorithm

    from keras import optimizers
    from deepats.optimizers import get_optimizer
    #optimizer = get_optimizer(args)

    ## RMS-PROP

    #optimizer = optimizers.RMSprop(lr=0.00075, rho=0.9, clipnorm=1)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, clipnorm=1)
    optimizer = optimizers.RMSprop(lr=0.003,
                                   rho=0.88,
                                   epsilon=1e-6,
                                   clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.0018, rho=0.88, epsilon=1e-6, clipnorm=10)

    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-6, clipnorm=10)# best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.88, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-8, clipnorm=10) # best 2.10 (RWA)

    ## OTHER METHODS
    #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5)
    #optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1)
    #optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-06, clipnorm=10)

    #optimizer = optimizers.Nadam(lr=0.002)

    #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10)
    #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=10)

    ###############################################################################################################################
    ## Building model
    #
    loss = kappa_loss
    metric_name = 'kappa'

    ########################################################

    from deepats.models import create_model
    model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab)

    ############################################
    '''
	# test yaml serialization/de-serialization
	yaml = model.to_yaml()
	print yaml
	from deepats.my_layers import MeanOverTime
	from deepats.rwa import RWA
	model = model_from_yaml(yaml, custom_objects={'MeanOverTime': MeanOverTime, 'RWA':RWA})
	'''
    ############################################

    model.compile(loss=loss, optimizer=optimizer, metrics=[kappa])
    print(model.summary())

    ###############################################################################################################################
    ## Callbacks
    callbacks = []

    ##############################
    ''' Evaluate test_kappa '''
    class Eval(Callback):
        def __init__(self, x, y, funcs, prefix='test', batch_size=128):
            super(Eval, self).__init__()
            self.x = x
            self.y = y
            self.funcs = funcs
            self.prefix = prefix
            self.batch_size = batch_size
            self.epoch = 0

        def on_epoch_end(self, batch, logs={}):
            self.epoch += 1
            p = np.asarray(
                self.model.predict(self.x,
                                   batch_size=self.batch_size).squeeze())
            for func in self.funcs:
                f = func(self.y, p)
                name = '{}_{}'.format(self.prefix, func.__name__)
                logs[name] = f
                print(' - {0}: {1:0.4f}'.format(name, f))

    eval = Eval(test_x, test_df['y'].values, [nkappa], 'test')
    callbacks.append(eval)

    ##############################
    ''' ModelCheckpoint '''

    wt_path = os.path.join(out_dir, 'weights.{}.hdf5'.format('rwa'))
    checkpt = ModelCheckpoint(wt_path,
                              monitor='val_kappa',
                              verbose=1,
                              save_best_only=True,
                              mode='max')
    callbacks.append(checkpt)

    ##############################
    ''' LRplateau '''

    class LRplateau(ReduceLROnPlateau):
        def __init__(self,
                     monitor='val_loss',
                     factor=0.1,
                     patience=10,
                     verbose=0,
                     mode='auto',
                     epsilon=1e-4,
                     cooldown=0,
                     min_lr=0,
                     checkpoint=None):
            super(LRplateau, self).__init__(monitor, factor, patience, verbose,
                                            mode, epsilon, cooldown, min_lr)
            self.checkpoint = checkpoint

        def on_lr_reduce(self, epoch):
            if self.checkpoint:
                if self.verbose > 0:
                    print('Epoch {}: loading wts from {}.\n'.format(
                        epoch, self.checkpoint.filepath))
                self.model.load_weights(self.checkpoint.filepath)

        def on_epoch_end(self, epoch, logs=None):
            logs = logs or {}
            logs['lr'] = K.get_value(self.model.optimizer.lr)
            current = logs.get(self.monitor)
            if current is None:
                warnings.warn(
                    'Learning Rate Plateau Reducing requires %s available!' %
                    self.monitor, RuntimeWarning)
            else:
                if self.in_cooldown():
                    self.cooldown_counter -= 1
                    self.wait = 0

                if self.monitor_op(current, self.best):
                    self.best = current
                    self.wait = 0
                elif not self.in_cooldown():
                    if self.wait >= self.patience:
                        old_lr = float(K.get_value(self.model.optimizer.lr))
                        if old_lr > self.min_lr + self.lr_epsilon:
                            new_lr = old_lr * self.factor
                            new_lr = max(new_lr, self.min_lr)
                            K.set_value(self.model.optimizer.lr, new_lr)
                            if self.verbose > 0:
                                print(
                                    '\nEpoch {0}: reducing learning rate to {1:0.4g}.'
                                    .format(epoch, new_lr))
                            self.cooldown_counter = self.cooldown
                            self.wait = 0
                            self.on_lr_reduce(epoch)
                    self.wait += 1

    reduce_lr = LRplateau(monitor='val_kappa',
                          mode='max',
                          patience=3,
                          factor=0.33,
                          min_lr=0.00001,
                          verbose=1,
                          checkpoint=checkpt)

    callbacks.append(reduce_lr)

    ###############################################################################################################################
    ## Training

    model.fit(train_x,
              train_y,
              validation_data=(dev_x, dev_df['y'].values),
              batch_size=args.batch_size,
              epochs=args.epochs,
              callbacks=callbacks,
              verbose=1)
Exemplo n.º 7
0
def run(argv=None):

    parser = argparse.ArgumentParser()
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir_path",
                        type=str,
                        metavar='<str>',
                        required=True,
                        help="The path to the output directory")
    parser.add_argument(
        "-p",
        "--prompt",
        dest="prompt_id",
        type=str,
        metavar='<str>',
        required=False,
        help="Promp ID for ASAP dataset. '0' means all prompts.")
    parser.add_argument("-t",
                        "--type",
                        dest="model_type",
                        type=str,
                        metavar='<str>',
                        default='regp',
                        help="Model type (reg|regp|breg|bregp) (default=regp)")
    parser.add_argument(
        "-u",
        "--rec-unit",
        dest="recurrent_unit",
        type=str,
        metavar='<str>',
        default='lstm',
        help="Recurrent unit type (lstm|gru|simple) (default=lstm)")
    parser.add_argument(
        "-a",
        "--algorithm",
        dest="algorithm",
        type=str,
        metavar='<str>',
        default='rmsprop',
        help=
        "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)"
    )
    parser.add_argument("-l",
                        "--loss",
                        dest="loss",
                        type=str,
                        metavar='<str>',
                        default='mse',
                        help="Loss function (mse|mae) (default=mse)")
    parser.add_argument("-e",
                        "--embdim",
                        dest="emb_dim",
                        type=int,
                        metavar='<int>',
                        default=50,
                        help="Embeddings dimension (default=50)")
    parser.add_argument(
        "-c",
        "--cnndim",
        dest="cnn_dim",
        type=int,
        metavar='<int>',
        default=0,
        help="CNN output dimension. '0' means no CNN layer (default=0)")
    parser.add_argument("-w",
                        "--cnnwin",
                        dest="cnn_window_size",
                        type=int,
                        metavar='<int>',
                        default=3,
                        help="CNN window size. (default=3)")
    parser.add_argument(
        "-r",
        "--rnndim",
        dest="rnn_dim",
        type=int,
        metavar='<int>',
        default=300,
        help="RNN dimension. '0' means no RNN layer (default=300)")
    parser.add_argument("-b",
                        "--batch-size",
                        dest="batch_size",
                        type=int,
                        metavar='<int>',
                        default=32,
                        help="Batch size (default=32)")
    parser.add_argument("-v",
                        "--vocab-size",
                        dest="vocab_size",
                        type=int,
                        metavar='<int>',
                        default=4000,
                        help="Vocab size (default=4000)")
    parser.add_argument(
        "--aggregation",
        dest="aggregation",
        type=str,
        metavar='<str>',
        default='mot',
        help=
        "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)"
    )
    parser.add_argument(
        "--dropout",
        dest="dropout_prob",
        type=float,
        metavar='<float>',
        default=0.5,
        help=
        "The dropout probability. To disable, give a negative number (default=0.5)"
    )
    parser.add_argument(
        "--vocab-path",
        dest="vocab_path",
        type=str,
        metavar='<str>',
        help="(Optional) The path to the existing vocab file (*.pkl)")
    parser.add_argument("--skip-init-bias",
                        dest="skip_init_bias",
                        action='store_true',
                        help="Skip initialization of the last layer bias")
    parser.add_argument(
        "--emb",
        dest="emb_path",
        type=str,
        metavar='<str>',
        help="The path to the word embeddings file (Word2Vec format)")
    parser.add_argument("--epochs",
                        dest="epochs",
                        type=int,
                        metavar='<int>',
                        default=100,
                        help="Number of epochs (default=50)")
    parser.add_argument(
        "--maxlen",
        dest="maxlen",
        type=int,
        metavar='<int>',
        default=0,
        help=
        "Maximum allowed number of words during training. '0' means no limit (default=0)"
    )
    parser.add_argument("--seed",
                        dest="seed",
                        type=int,
                        metavar='<int>',
                        default=0,
                        help="Random seed (default=1234)")
    parser.add_argument("--mode",
                        dest="run_mode",
                        type=str,
                        metavar='<str>',
                        default='train',
                        help="run mode")

    ## dsv
    parser.add_argument("--min-word-freq",
                        dest="min_word_freq",
                        type=int,
                        metavar='<int>',
                        default=2,
                        help="Min word frequency")
    parser.add_argument("--stack",
                        dest="stack",
                        type=int,
                        metavar='<int>',
                        default=1,
                        help="how deep to stack core RNN")
    parser.add_argument("--skip-emb-preload",
                        dest="skip_emb_preload",
                        action='store_true',
                        help="Skip preloading embeddings")
    parser.add_argument("--tokenize-old",
                        dest="tokenize_old",
                        action='store_true',
                        help="use old tokenizer")

    parser.add_argument("-ar",
                        "--abs-root",
                        dest="abs_root",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to root directory")
    parser.add_argument("-ad",
                        "--abs-data",
                        dest="abs_data",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to data directory")
    parser.add_argument("-ao",
                        "--abs-out",
                        dest="abs_out",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    parser.add_argument("-dp",
                        "--data-path",
                        dest="data_path",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    ##

    if argv is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(argv)

    out_dir = args.abs_out
    U.mkdir_p(os.path.join(out_dir, 'preds'))
    U.set_logger(out_dir)
    U.print_args(args)

    assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'}
    assert args.algorithm in {
        'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax'
    }
    assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'}
    assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'}
    assert args.aggregation in {'mot', 'attsum', 'attmean'}

    if args.seed > 0:
        RANDSEED = args.seed
    else:
        RANDSEED = np.random.randint(10000)
    np.random.seed(RANDSEED)

    pid = args.prompt_id
    mode = args.run_mode

    #######################

    #from deepats.util import GPUtils as GPU
    # 	import GPUtil as GPU
    # 	mem = GPU.avail_mem()
    # 	logger.info('AVAIL GPU MEM == %.4f' % mem)
    # 	if mem < 0.05:
    # 		return None
    ###############################################################################################################################
    ## Prepare data
    #

    emb_words = None
    if not args.skip_emb_preload:  #if args.emb_path:
        from deepats.w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Loading embedding vocabulary...')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        emb_words = emb_reader.load_words()

    vocab_path = None
    abs_vocab_file = os.path.join(out_dir, 'vocab.pkl')
    if mode == 'test':
        vocab_path = abs_vocab_file

    train_df, dev_df, test_df, vocab, overal_maxlen = ets_reader.get_mode_data(
        args.data_path,
        dev_split=0.1,
        emb_words=emb_words,
        vocab_path=vocab_path,
        seed=RANDSEED)

    train_x = train_df['text'].values
    train_y = train_df['yint'].values.astype('float32')
    dev_x = dev_df['text'].values
    dev_y = dev_df['yint'].values.astype('float32')
    test_x = test_df['text'].values
    test_y = test_df['yint'].values.astype('float32')

    # Dump vocab
    if mode == 'train':
        with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file:
            pk.dump(vocab, vocab_file)

    if args.recurrent_unit == 'rwa':
        setattr(args, 'model_type', 'rwa')
    if args.recurrent_unit == 'lstm':
        setattr(args, 'model_type', 'regp')

    # Pad sequences for mini-batch processing
    from keras.preprocessing import sequence

    train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen)
    dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen)
    test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen)

    ###############################################################################################################################
    ## Some statistics

    bincounts, mfs_list = U.bincounts(train_y)
    with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file:
        for bincount in bincounts:
            output_file.write(str(bincount) + '\n')

    train_mean = train_y.mean(axis=0)
    train_std = train_y.std(axis=0)
    dev_mean = dev_y.mean(axis=0)
    dev_std = dev_y.std(axis=0)
    test_mean = test_y.mean(axis=0)
    test_std = test_y.std(axis=0)

    logger.info('Statistics:')
    logger.info('  PROMPT_ID\t= ' + U.b_green(args.prompt_id))
    logger.info('  RANDSEED\t= ' + U.b_green(str(RANDSEED)))
    logger.info('  train_x shape: ' + str(np.array(train_x).shape))
    logger.info('  dev_x shape:   ' + str(np.array(dev_x).shape))
    logger.info('  test_x shape:  ' + str(np.array(test_x).shape))
    logger.info('  train_y shape: ' + str(train_y.shape))
    logger.info('  dev_y shape:   ' + str(dev_y.shape))
    logger.info('  test_y shape:  ' + str(test_y.shape))
    logger.info('  train_y mean: %s, stdev: %s, MFC: %s' %
                (str(train_mean), str(train_std), str(mfs_list)))
    logger.info('  overal_maxlen:  ' + str(overal_maxlen))

    ###############################################################################################################################
    ## Optimizaer algorithm

    from keras import optimizers
    from deepats.optimizers import get_optimizer
    #optimizer = get_optimizer(args)

    # 	optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10)#***RWA***

    optimizer = optimizers.Adam(lr=0.001,
                                beta_1=0.9,
                                beta_2=0.999,
                                epsilon=1e-08,
                                clipnorm=1)

    # 	optimizer = optimizers.Nadam(lr=0.001, clipnorm=10)
    # 	optimizer = optimizers.Nadam(lr=0.002, clipnorm=1)

    # 	optimizer = optimizers.RMSprop(lr=0.0015, rho=0.9, epsilon=1e-8, clipnorm=10)
    # 	optimizer = optimizers.RMSprop(lr=0.003, rho=0.88, epsilon=1e-6, clipnorm=10)
    # 	optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA)

    ## OTHER METHODS
    #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5)
    #optimizer = optimizers.Nadam(lr=0.002)
    #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10)
    #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10)

    ###############################################################################################################################
    ## Building model
    from deepats.models import create_model

    #loss = kappa_loss
    #metrics = [kappa,'mean_squared_error']

    if args.loss == 'mse':
        loss = 'mean_squared_error'
        metrics = ['acc']
        # 		metrics = [kappa]
        monitor = 'val_kappa'
    elif args.loss == 'kappa':
        loss = kappa_loss
        metrics = [kappa]
        monitor = 'val_kappa'

    model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab)
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    print(model.summary())

    ###############################################################################################################################
    ## Callbacks
    callbacks = []

    ##############################
    ''' Evaluate test_kappa '''

    from sklearn.metrics import roc_auc_score as auc, average_precision_score

    def map(y_true, y_prob):
        return average_precision_score(y_true, y_prob)

    class Eval(Callback):
        def __init__(self, x, y, funcs, prefix='test', batch_size=128):
            super(Eval, self).__init__()
            self.x = x
            self.y = y
            self.funcs = funcs
            self.prefix = prefix
            self.batch_size = batch_size
            self.epoch = 0

        def on_epoch_end(self, epoch, logs={}):
            self.epoch += 1
            p = np.asarray(
                self.model.predict(self.x,
                                   batch_size=self.batch_size).squeeze())
            for func in self.funcs:
                f = func(self.y, p)
                name = '{}_{}'.format(self.prefix, func.__name__)
                logs[name] = f
                print(' - {0}: {1:0.4f}'.format(name, f))
                #sys.stdout.write(' - {0}: {1:0.4f} '.format(name,f))

    eval = Eval(dev_x, dev_df['yint'].values, [map], 'val')
    callbacks.append(eval)
    monitor = 'val_map'

    # 	eval = Eval(test_x, test_df['yint'].values, [qwk,auc], 'test'); callbacks.append(eval)
    eval = Eval(test_x, test_df['yint'].values, [map, qwk], 'test')
    callbacks.append(eval)
    # 	monitor = 'test_map'

    ##############################
    ''' ModelCheckpoint '''

    wt_path = os.path.join(out_dir, 'weights.{}.hdf5'.format(pid))
    checkpt = ModelCheckpoint(wt_path,
                              monitor=monitor,
                              verbose=1,
                              save_best_only=True,
                              mode='max')
    callbacks.append(checkpt)

    ##############################
    ''' PR Curve '''
    from sklearn.metrics import precision_recall_curve
    import matplotlib.pyplot as plt

    class PR(object):
        def __init__(self,
                     model,
                     checkpoint,
                     x,
                     y,
                     prefix='test',
                     batch_size=128):
            self.model = model
            self.checkpoint = checkpoint
            self.x = x
            self.y = y
            self.prefix = prefix
            self.batch_size = batch_size

        def predict(self):
            self.model.load_weights(self.checkpoint.filepath)
            self.p = np.asarray(
                self.model.predict(self.x,
                                   batch_size=self.batch_size).squeeze())

        def pr_curve(self, y, p, s=''):
            aps = average_precision_score(y, p)
            precision, recall, _ = precision_recall_curve(y, p)
            name = '{}_{}'.format(self.prefix, 'pr_curve')
            plt.figure()
            plt.step(recall, precision, color='b', alpha=0.2, where='post')
            plt.fill_between(recall,
                             precision,
                             step='post',
                             alpha=0.2,
                             color='b')
            plt.xlabel('Recall')
            plt.ylabel('Precision')
            plt.ylim([0.0, 1.05])
            plt.xlim([0.0, 1.0])
            plt.title('PR curve (mode={1}): {2}, AUC={0:0.4f}'.format(
                aps, pid, s))

        def run_sample(self, q, n=1000):
            (y, p) = down_sample_bootstrap(self.y, self.p, q, n)
            ## draw curve
            self.pr_curve(y, p, s='{0}% off-mode'.format(int(q * 100)))
            ## make table
            print('\nMode={2}, {0}% off-mode (#samples={1}):'.format(
                int(q * 100), n, pid))
            print tabulate(stats((y, p), n),
                           headers="firstrow",
                           floatfmt='.3f')

        def run(self, Q=[0.1, 0.01]):
            self.predict()
            for q in Q:
                self.run_sample(q)
            return self.y, self.p

    pr = PR(model, checkpt, test_x, test_df['yint'].values, 'test')

    ##############################
    ''' LRplateau '''

    class LRplateau(ReduceLROnPlateau):
        def __init__(self,
                     monitor='val_loss',
                     factor=0.1,
                     patience=10,
                     verbose=0,
                     mode='auto',
                     epsilon=1e-4,
                     cooldown=0,
                     min_lr=0,
                     checkpoint=None):
            super(LRplateau, self).__init__(monitor, factor, patience, verbose,
                                            mode, epsilon, cooldown, min_lr)
            self.checkpoint = checkpoint

        def on_lr_reduce(self, epoch):
            if self.checkpoint:
                if self.verbose > 0:
                    print('Epoch {}: loading wts from {}.\n'.format(
                        epoch, self.checkpoint.filepath))
                self.model.load_weights(self.checkpoint.filepath)

        def on_epoch_end(self, epoch, logs=None):
            logs = logs or {}
            logs['lr'] = K.get_value(self.model.optimizer.lr)
            current = logs.get(self.monitor)
            if current is None:
                warnings.warn(
                    'Learning Rate Plateau Reducing requires %s available!' %
                    self.monitor, RuntimeWarning)
            else:
                if self.in_cooldown():
                    self.cooldown_counter -= 1
                    self.wait = 0

                if self.monitor_op(current, self.best):
                    self.best = current
                    self.wait = 0
                elif not self.in_cooldown():
                    if self.wait >= self.patience:
                        old_lr = float(K.get_value(self.model.optimizer.lr))
                        if old_lr > self.min_lr + self.lr_epsilon:
                            new_lr = old_lr * self.factor
                            new_lr = max(new_lr, self.min_lr)
                            K.set_value(self.model.optimizer.lr, new_lr)
                            if self.verbose > 0:
                                print(
                                    '\nEpoch {0}: reducing learning rate to {1:0.4g}.'
                                    .format(epoch, new_lr))
                            self.cooldown_counter = self.cooldown
                            self.wait = 0
                            self.on_lr_reduce(epoch)
                    self.wait += 1

    reduce_lr = LRplateau(monitor=monitor,
                          mode='max',
                          patience=3,
                          factor=0.33,
                          min_lr=0.00001,
                          verbose=1,
                          checkpoint=checkpt)

    callbacks.append(reduce_lr)

    ###############################################################################################################################
    ## Training
    if mode == 'train':
        model.fit(train_x,
                  train_y,
                  validation_data=(dev_x, dev_df['yint'].values),
                  batch_size=args.batch_size,
                  epochs=args.epochs,
                  callbacks=callbacks,
                  verbose=1)

    ## Evaluate ###############################################
    y, p = pr.run(Q=[0.2, 0.1, 0.05])
    return y, p