Пример #1
0
    parser.add_argument('-dataset_faces_folder',
                        default=DEFAULT_DATA_FACES_PATH,
                        help='Path to the images file')
    parser.add_argument('-dataset_audios_folder',
                        default=DEFAULT_DATA_AUDIOS_PATH,
                        help='Path to the audios file')
    parser.add_argument('-checkpoint_dir',
                        default=DEFAULT_CHECKPOINT_DIR,
                        help='Model checkpoint to use')
    parser.add_argument('-log_dir',
                        default=DEFAULT_LOG_DIR,
                        help='Model checkpoint to use')
    parser.add_argument('-resume',
                        default="True",
                        help='Resume training ("True" or "False")')

    args = parser.parse_args()

    if args.resume == "False":
        if tf.gfile.Exists(args.log_dir):
            tf.gfile.DeleteRecursively(args.log_dir)
        tf.gfile.MakeDirs(args.log_dir)

    if not os.path.isdir(os.path.dirname(args.checkpoint_dir)):
        os.mkdir(os.path.dirname(args.checkpoint_dir))

    train(batch_size=16,
          epochs=10,
          dataset=DataInput(args.dataset_faces_folder,
                            args.dataset_audios_folder, "train"),
          log_dir=args.log_dir)
Пример #2
0
flags.DEFINE_float('decay_rate', 0.75, 'decay rate, default: 0.75')
flags.DEFINE_float('keep_prob', 0.5, 'keep_prob for training, default: 0.5')
flags.DEFINE_integer('batch_size', 50, 'batch_size')

flags.DEFINE_integer('decay_step', 1000, 'decay_step, default: 1000')
flags.DEFINE_integer('valid_step', 500, 'valid_step, default: 500')
flags.DEFINE_float('last_f1', 0.10, 'if valid_f1 > last_f1, save new model. default: 0.10')
FLAGS = flags.FLAGS

lr = FLAGS.lr
last_f1 = FLAGS.last_f1
epoch = FLAGS.max_max_epoch
train_batch_size = FLAGS.batch_size
checkpoint_dir = '/Users/slade/Documents/YMM/Code/tf/model/ckpt'
gpu_config = tf.ConfigProto()
gpu_config.gpu_options.allow_growth = True
with tf.Session(config=gpu_config) as sess:
    model = Model(args)

    # init variables
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    time0 = time.time()
    for batch in tqdm(range(epoch)):
        global_step = sess.run(model.global_step)
        for _, uij in DataInput(train_set, train_batch_size):
            # training
            feed_dict = {model.inputs: uij, model.keep_prob: FLAGS.keep_prob, model.lr: lr}
            summary, _cost, _, _ = sess.run(train_fetches, feed_dict)  # the cost is the mean cost of one batch
Пример #3
0
from data_input import DataInput
from PIL import Image
import numpy as np
from scipy.misc import imsave
from skimage.exposure import histogram

data_path_faces = "/storage/dataset"

if __name__ == '__main__':
    threshold = 700
    dataset = DataInput("/storage/dataset",
                        "/storage/dataset_videos/cropped_videos/outputb",
                        "train")
    items_faces, items_audio = dataset.get_items()
    input_images = np.empty([len(items_faces), 64, 64, 3])
    count = 0
    index = [0, 3, 6, 8, 9, 17, 21, 29]
    references = np.empty(shape=[len(index), 64, 10, 3])
    hist_references = np.empty(shape=[len(index), 256])
    bins_references = np.empty(shape=[len(index), 257])
    ind_count = 0
    for ind in index:
        reference = Image.open(items_faces[ind])
        reference = np.asarray(reference, dtype=float)
        reference = reference[:, 0:10, :]
        references[ind_count] = reference
        hist_reference, bins_reference = np.histogram(reference,
                                                      bins=256,
                                                      range=(0, 255))
        hist_references[ind_count] = hist_reference
        bins_references[ind_count] = bins_reference
Пример #4
0
def main():

    config = json.load(open("config.json", "r"))

    DATA_PATH = config["DATA_PATH"]
    INITIAL_LR = float(config["INITIAL_LR"])
    DECAY_STEPS_LR = int(config["DECAY_STEPS_LR"])
    DECAY_FACTOR_LR = float(config["DECAY_FACTOR_LR"])
    BATCH_SIZE = int(config["BATCH_SIZE"])
    NUM_STEPS = int(config["NUM_STEPS"])
    OUTPUT_DIR = config["OUTPUT_DIR"]
    VAL_SET_SIZE = int(config["VAL_SET_SIZE"])
    KEEP_DROPOUT_PROB = float(config["KEEP_DROPOUT_PROB"])
    WEIGHT_DECAY = float(config["WEIGHT_DECAY"])
    MODEL = config["MODEL"]
    AUGMENT_PROB = float(config["AUGMENT_PROB"])
    LOSS = config["LOSS"]

    # create model output folder and copy corresponding config file to it
    now = datetime.now()
    current_time = now.strftime("%D_%H%M%S")
    current_time = current_time.replace("/", "")
    model_path = os.path.join(OUTPUT_DIR, "_".join([MODEL, current_time]))
    if not os.path.exists(model_path):
        os.mkdir(model_path)
    shutil.copy("config.json", os.path.join(model_path, "config.json"))
    shutil.copy("CNN_models/%s.py" % MODEL, os.path.join(model_path, "model.py"))

    # placeholders
    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name="learning_rate_ph")
    images_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="input_images_ph")
    labels_ph = tf.placeholder(tf.int32, shape=[None], name="labels_ph")
    accuracy_ph = tf.placeholder(tf.float32, shape=[], name="accuracy_ph")
    accuracy_per_class_phs = [tf.placeholder(tf.float32, shape=[], name="accuracy_per_classs/class_%s_ph" % str(i)) for i in range(10)]
    training_ph = tf.placeholder(tf.bool, shape=[], name="training_ph")

    # choose model
    if MODEL == "simple_model_1":
        logits = simple_model_1(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph)
    elif MODEL == "simple_model_2":
        logits = simple_model_2(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph)
    elif MODEL == "inception":
        logits = inception(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph)
    elif MODEL == "resnet":
        logits = resnet(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph)
    elif MODEL == "inception_resnet":
        logits = inception_resnet(images_ph, dropout_prob=KEEP_DROPOUT_PROB, weight_decay=WEIGHT_DECAY, is_training=training_ph)

    # create loss
    if LOSS == "CROSS_ENTROPY":
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels_ph, name="c_entropy"))
    elif LOSS == "CROSS_ENTROPY_WEIGHTED":
        class_weights = tf.constant([2, 1, 2, 1, 2, 1, 2, 1, 1, 1])
        weights = tf.gather(class_weights, labels_ph)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels_ph, logits=logits, weights=weights)
    elif LOSS == "FOCAL":
        gamma = 2
        preds = tf.nn.softmax(logits, dim=-1)
        labels_one_hot = tf.one_hot(labels_ph, depth=preds.shape[1])
        loss = -labels_one_hot * ((1 - preds) ** gamma) * tf.log(preds)
        loss = tf.reduce_mean(tf.reduce_sum(loss, axis=1))

    optimizer = tf.train.AdamOptimizer(learning_rate_ph)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss)

    # create input reader object
    data_input = DataInput(DATA_PATH, BATCH_SIZE, VAL_SET_SIZE, AUGMENT_PROB)
    val_images, val_labels = data_input.get_val_set()

    # saver
    saver = tf.train.Saver()

    # summaries
    tf.summary.scalar("loss", loss)
    tf.summary.scalar("learning_rate", learning_rate_ph)
    tf.summary.scalar("accuracy", accuracy_ph)
    for i in range(10):
        tf.summary.scalar("accuracy_per_class/class_%s" % str(i), accuracy_per_class_phs[i])

    summary_op = tf.summary.merge_all()

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        train_writer = tf.summary.FileWriter(os.path.join(model_path, "train"), sess.graph)
        val_writer = tf.summary.FileWriter(os.path.join(model_path, "validation"), sess.graph)

        # number of parameters
        total_parameters = 0
        for variable in tf.trainable_variables():
            shape = variable.get_shape()
            variable_parameters = 1
            for dim in shape:
                variable_parameters *= dim.value
            total_parameters += variable_parameters
        print("total_params: " + str(total_parameters))

        train_summary_pred = []
        train_summary_labels = []

        for i in range(1, NUM_STEPS+1):

            print("Step: " + str(i))

            train_images, train_labels = data_input.get_batch()

            power = i // DECAY_STEPS_LR
            learning_rate = INITIAL_LR * DECAY_FACTOR_LR ** power

            train_feed_dict = {images_ph: train_images, labels_ph: train_labels, learning_rate_ph: learning_rate, training_ph: True}
            sess.run(train_op, feed_dict=train_feed_dict)

            if i % 5000 == 0:
                saver.save(sess, os.path.join(model_path, "model.ckpt"), i)

            if i % 4 == 0:
                train_logits = sess.run(logits, feed_dict={images_ph: train_images, training_ph: False})
                pred = np.argmax(train_logits, axis=1)
                train_summary_pred += [p for p in pred]
                train_summary_labels += [l for l in train_labels]

            if i % 500 == 0:
                train_summary_pred = np.array(train_summary_pred)
                train_summary_labels = np.array(train_summary_labels)
                hits = train_summary_pred == train_summary_labels
                accuracy = np.round(np.sum(hits) / len(hits) * 100, decimals=2)
                accuracy_per_class = []
                for cl in range(10):
                    accuracy_cl = np.round(np.sum(hits[train_summary_labels == cl]) / np.sum(train_summary_labels == cl) * 100, decimals=2)
                    accuracy_per_class.append(accuracy_cl)

                train_feed_dict[accuracy_ph] = accuracy
                for cl in range(10):
                    train_feed_dict[accuracy_per_class_phs[cl]] = accuracy_per_class[cl]
                summary_train = sess.run(summary_op, feed_dict=train_feed_dict)
                train_writer.add_summary(summary_train, i)
                train_summary_labels = []
                train_summary_pred = []

                # validation summary
                val_logits = sess.run(logits, feed_dict={images_ph: val_images, training_ph: False})
                pred = np.argmax(val_logits, axis=1)
                hits = pred == val_labels
                accuracy = np.round(np.sum(hits) / len(pred) * 100, decimals=2)
                accuracy_per_class = []
                for cl in range(10):
                    accuracy_cl = np.round(np.sum(hits[val_labels == cl]) / np.sum(val_labels == cl) * 100, decimals = 2)
                    accuracy_per_class.append(accuracy_cl)

                val_feed_dict = {images_ph: val_images, labels_ph: val_labels, accuracy_ph: accuracy, learning_rate_ph: learning_rate, training_ph: True}
                for cl in range(10):
                    val_feed_dict[accuracy_per_class_phs[cl]] = accuracy_per_class[cl]
                summary_val = sess.run(summary_op, feed_dict=val_feed_dict)
                val_writer.add_summary(summary_val, i)
Пример #5
0
                # ##========================= train LSGAN =========================###
                summary_str, gLoss, dLoss, _, _ = sess.run([summary, g_loss, d_loss, g_optim, d_optim],
                                              feed_dict={images: input_images, z: input_z,
                                                         y_gan_real: labels_real, y_gan_fake: labels_fake,
                                                         y_generator: labels_generator})
                print("Epoch: %2d Iteration: %2d gLoss: %.8f dLoss: %.8f." % (j, iteration, gLoss, dLoss))
                summary_writer.add_summary(summary_str, iteration)



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Predict script')
    parser.add_argument('-dataset_faces_folder', default=DEFAULT_DATA_FACES_PATH, help='Path to the images file')
    parser.add_argument('-dataset_audios_folder', default=DEFAULT_DATA_AUDIOS_PATH, help='Path to the audios file')
    parser.add_argument('-checkpoint_dir', default=DEFAULT_CHECKPOINT_DIR, help='Model checkpoint to use')
    parser.add_argument('-log_dir', default=DEFAULT_LOG_DIR, help='Model checkpoint to use')
    parser.add_argument('-resume', default="True", help='Resume training ("True" or "False")')

    args = parser.parse_args()

    if args.resume == "False":
        if tf.gfile.Exists(args.log_dir):
            tf.gfile.DeleteRecursively(args.log_dir)
        tf.gfile.MakeDirs(args.log_dir)

    if not os.path.isdir(os.path.dirname(args.checkpoint_dir)):
        os.mkdir(os.path.dirname(args.checkpoint_dir))

    train(batch_size=16, epochs=10, dataset=DataInput(args.dataset_faces_folder, args.dataset_audios_folder,
                                                     "train"), log_dir=args.log_dir)
t
Пример #6
0
logging = tf.logging
flags = tf.flags
flags.DEFINE_bool("verbose", False, "To talk or not to talk")
flags.DEFINE_string("save_path", None, "Model output directory")
flags.DEFINE_string("config_file", None, "Model config file")
FLAGS = flags.FLAGS

if __name__ == "__main__":
    if not tf.gfile.Exists('./save'):
        tf.gfile.MkDir('./save')

    # Config stuff
    config = get_config(FLAGS)

    data_input = DataInput(config)
    train_batches = data_input.train_epoch_size
    val_batches = data_input.val_epoch_size

    # Model building
    if config.model_type == 'dmnn':
        if config.model_version == 'v1':
            model_wrap = DMNNv1(config)

    if FLAGS.verbose:
        print('DMNN model:')
        print(model_wrap.model.summary())

    if config.epoch > 0:
        model_wrap.model = restore_keras_model(
            model_wrap.model, config.save_path + '_weights.hdf5')
Пример #7
0
logging = tf.logging
flags = tf.flags
flags.DEFINE_bool("verbose", False, "To talk or not to talk")
flags.DEFINE_string("save_path", None, "Model output directory")
flags.DEFINE_string("config_file", None, "Model config file")
FLAGS = flags.FLAGS

if __name__ == "__main__":
    _reset_rand_seed()
    if not tf.gfile.Exists('./save'):
        tf.gfile.MkDir('./save')

    # Config stuff
    config = get_config(FLAGS)

    data_input = DataInput(config)
    _reset_rand_seed()
    train_batches = data_input.train_epoch_size
    train_generator = data_input.batch_generator(True)
    val_batches = data_input.val_epoch_size
    val_generator = data_input.batch_generator(False)

    # Model building
    if config.model_type == 'motiongan':
        model_wrap = get_model(config)

    if FLAGS.verbose:
        print('Discriminator model:')
        print(model_wrap.disc_model.summary())
        print('Generator model:')
        print(model_wrap.gen_model.summary())
Пример #8
0
flags.DEFINE_string("config_file", "motiongan_v1_fae_h36", "Model config file")
FLAGS = flags.FLAGS


def _reset_rand_seed():
    seed = 42
    np.random.seed(seed)


if __name__ == "__main__":
    # Config stuff
    config = get_config(FLAGS)
    # config.only_val = True
    config.normalize_data = False
    # config.pick_num = 0
    data_input = DataInput(config)
    _reset_rand_seed()

    n_batches = 4
    n_splits = 32
    print('Plotting %d batches in %d splits for the %s dataset' %
          (n_batches, n_splits, config.data_set))
    for b in range(n_batches):

        labs_batch, poses_batch = data_input.batch_generator(False).next()

        n_seqs = (config.batch_size // n_splits)
        for i in trange(n_splits):
            plot_seq_gif(
                poses_batch[i * n_seqs:(i + 1) * n_seqs, :, :, :3],
                labs_batch[i * n_seqs:(i + 1) * n_seqs, ...],
Пример #9
0
 def __init__(self):
     self.data = DataInput().inputFunction()
     print("\n\n" +
           "WELCOME TO THE MACHINE LEARNING PREPROCESSOR CLI!!!\n" + "\n\n")
Пример #10
0
        try:
            iteration = 0
            while not coord.should_stop():
                iteration += 1
                # ##========================= train SRGAN =========================###
                kt, mGlobal, _, _ = sess.run(
                    [k_update, m_global, g_optim, d_optim])
                print("kt: %.8f Mglobal: %.8f" % (kt, mGlobal))
                summary_str = sess.run(summary)
                summary_writer.add_summary(summary_str, iteration)

                summary_writer.flush()

                # ##========================= evaluate data =========================###

        except tf.errors.OutOfRangeError:
            print('Done -- epoch limit reached')
        finally:
            coord.request_stop()
            coord.join(threads)


if __name__ == '__main__':
    data_path = "/storage/dataset_videos/audio2faces_dataset/"
    log_dir = "/storage/irina/logs"
    train(batch_size=16,
          epochs=1000,
          dataset=DataInput(data_path, "train"),
          log_dir=log_dir)
Пример #11
0
                            nrows=3629,
                            encoding='utf-8',
                            dtype={'TICKER_SYMBOL': 'str'},
                            usecols=[1, 6, 8],
                            parse_dates=[2])
market_info = market_info[market_info.TICKER_SYMBOL.isin(pre_list.TICKER_SYMBOL)].\
    assign(MARKET_VALUE = market_info.MARKET_VALUE/10e7).reset_index(drop=True)

### 宏观经济数据
macro_info = pd.read_excel(path%'fddc1_data/Macro&Industry.xlsx',header=0,\
              sheet_name='INDIC_DATA',encoding='utf8',dtype = {'indic_id': 'str'},\
              parse_dates=[1],usecols=[0,4,5],index_col='PERIOD_DATE')['20101231':]
macro_info.index = macro_info.index + datetime.timedelta(85)
### 财务数据
df_lst = DataInput(path='../../fddc1_data/financial_data/%s',
                   comlist=pre_list.TICKER_SYMBOL,
                   last=False)
#df_lst.to_csv(path%'/47_152/data/df_fst.csv',index=False)
#df_lst = pd.read_csv(path%'/47_152/data/df_lst.csv',dtype = {'TICKER_SYMBOL':'str'})
### 季度化数据
df_normal = GroupSeries(df_lst, dropnan=True)
### 添加宏观数据
df_model_macro = AddMacroData(df_normal, macro_info)
### 模型训练
df_xgb_ind = RevenuePre(df_model_macro)

df_predict_ind = RevenueTran(df_model_macro, df_xgb_ind, market_info,
                             [1.24, 1.07, 1.00, 1.15], 0.5, 0.75, 0.24)
df_submit_ind = RevenueCom(df_predict_ind, pre_list)

### 行业营收模型
Пример #12
0
 def __init__(self):
     self.data = DataInput().inputFunction()
     print("\n\n" + self.bold_text_start +
           "MACHINE LEARNING PREPROCESSOR CLI" + self.bold_text_end +
           "\n\n")