Пример #1
0
            i+=1
            #print("Index {} overeenkomstig label {}".format(index,targets[index]));sys.stdout.flush()
        else:
            indices.append(index)
    if shuffle:
        np.random.shuffle(indices)
    for start_idx in range(0, len(indices) - batch_size + 1, batch_size):
        excerpt = indices[start_idx:start_idx + batch_size]
        yield inputs[excerpt], targets[excerpt]


import load_class
load = load_class.load(data_ratio)

import convnet
convnet = convnet.convnet(20)

try:
    i = 0
    path = "/home/jasper/oneshot-gestures/"
    while os.path.exists("{}output/acc-cost_{}.csv".format(path,i)):
        i += 1
    fo1 = open("{}output/acc-cost_{}.csv".format(path,i), "w")
    fo1.write("training_loss;validation_loss;validation_accuracy;epoch_time\n")
except IOError as e:
    print("I/O error({0}): {1}".format(e.errno, e.strerror))
    raise
except:
    print("unexpected error")
    raise
save_param_path = "{}model_parameters/param_model_19_2".format(path)
def main(unused_argv):
    if len(unused_argv
           ) != 1:  # prints a message if you've entered flags incorrectly
        raise Exception("Problem with flags: %s" % unused_argv)

    # Loading the external information first
    extra_info = {}
    if os.path.exists(FLAGS.external_config):
        external_params = xml_parser.parse(FLAGS.external_config, flat=False)

        if 'sent2vec_params' in external_params:
            sent2vec_params = external_params['sent2vec_params']
            convnet_params = sent2vec_params['convnet_params']
            convnet_model2load = sent2vec_params['model2load']

            gamma = 0.2 if not 'gamma' in sent2vec_params else sent2vec_params[
                'gamma']

            my_convnet = convnet.convnet(convnet_params)
            my_convnet.train_validate_test_init()
            my_convnet.load_params(file2load=convnet_model2load)

            fixed_vars = tf.global_variables()
            fixed_vars.remove(my_convnet.embedding_matrix)

            extra_info['sent2vec'] = {'gamma': gamma, 'network': my_convnet}
            extra_info['fixed_vars'] = fixed_vars

        if 'key_phrases' in external_params:
            # TODO: phrase some parameters to import the results of key-phrase extracted or \
            # parameters for online key-phrase extraction
            extra_info['key_phrases'] = {}
            raise NotImplementedError(
                'Key phrases part has not been implemented yet')

    tf.logging.set_verbosity(
        tf.logging.INFO)  # choose what level of logging you want
    tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode))

    # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary
    FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
    if not os.path.exists(FLAGS.log_root):
        if FLAGS.mode == "train":
            os.makedirs(FLAGS.log_root)
        else:
            raise Exception(
                "Logdir %s doesn't exist. Run in train mode to create it." %
                (FLAGS.log_root))

    vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size)  # create a vocabulary

    # If in decode mode, set batch_size = beam_size
    # Reason: in decode mode, we decode one example at a time.
    # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses.
    if FLAGS.mode == 'decode':
        FLAGS.batch_size = FLAGS.beam_size

    # Make a namedtuple hps, containing the values of the hyperparameters that the model needs
    hparam_list = [
        'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag',
        'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage',
        'cov_loss_wt', 'pointer_gen'
    ]
    hps_dict = {}
    for key, val in FLAGS.__flags.iteritems():  # for each flag
        if key in hparam_list:  # if it's in the list
            hps_dict[key] = val  # add it to the dict
    hps = namedtuple("HParams", hps_dict.keys())(**hps_dict)

    # Create a batcher object that will create minibatches of data
    batcher = Batcher(FLAGS.data_path,
                      vocab,
                      hps,
                      single_pass=FLAGS.single_pass)

    tf.set_random_seed(111)  # a seed value for randomness

    if hps.mode == 'train':
        print "creating model..."
        model = SummarizationModel(hps, vocab, extra_info)
        setup_training(model, batcher)
    elif hps.mode == 'eval':
        model = SummarizationModel(hps, vocab, extra_info)
        run_eval(model, batcher, vocab)
    elif hps.mode == 'decode':
        decode_model_hps = hps  # This will be the hyperparameters for the decoder model
        decode_model_hps = hps._replace(
            max_dec_steps=1
        )  # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries
        model = SummarizationModel(decode_model_hps, vocab, extra_info)
        decoder = BeamSearchDecoder(model, batcher, vocab)
        decoder.decode(
        )  # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once)
    else:
        raise ValueError("The 'mode' flag must be one of train/eval/decode")
    embedding_loader_params = embedding_params['embedding_loader_params']
    source = embedding_params['source']
    format = embedding_params['format']
    my_embedding_loader = embedding_loader.embedding_loader(
        embedding_loader_params)

    assert convnet_model_params['embedding_dim']==my_embedding_loader.embedding_dim, \
        'dimensions of word embeddings do not match, convnet=%d, embedding_loader=%d'%(
        convnet_model_params['embedding_dim'], my_embedding_loader.embedding_dim)

    my_embedding_loader.load_embedding(source=source, format=format)
    embedding_matrix = my_embedding_loader.gen_embedding_matrix(
        generator=my_data_generator)
    convnet_model_params['embedding_matrix'] = embedding_matrix

my_network = convnet.convnet(convnet_model_params)
if not os.path.exists(model_saved_folder):
    os.makedirs(model_saved_folder)

my_network.train_validate_test_init()
if 'model2load' in network_params:
    try:
        my_network.load_params(network_params['model2load'])
    except:
        print('ERROR: Failed to load checkpoint: %s' %
              network_params['model2load'])

train_loss_dict = {}
validate_loss_dict = {}
train_loss = []
best_validation_loss = 1e8  # best validation loss
    for start_idx in range(0, len(indices) - batch_size + 1, batch_size):
        excerpt = indices[start_idx:start_idx + batch_size]
        yield inputs[excerpt], targets[excerpt]



base_dir_path = "/home/jasper/oneshot-gestures/"
test_accuracies = []

load = load_class.load(size_ratio=1.0)
# Load data
x_validate, labels_validate, indices_validate = load.load_validation_set()
x_train, labels_train, indices_train = load.load_training_set()
x_test, labels_test, indices_test = load.load_testing_set()

convnet = convnet.convnet(num_output_units=20)
convnet.save_param_values("{}/default_param".format(base_dir_path))

for oneshot_class in xrange(20
                            ):
    print("Learning gestures excluding class {}".format(oneshot_class))

    save_param_patch = "{}convnet_params/param-excl-class-{}".format(base_dir_path,oneshot_class)

    convnet.load_param_values(save_param_patch)
    try:
        fo1 = open("{}output_19cl/excl-class-{}.csv".format(base_dir_path,oneshot_class),"w")
        fo1.write("training_loss;validation_loss;validation_accuracy;epoch_time\n")
    except IOError as e:
        print("I/O error({0}): {1}".format(e.errno, e.strerror))
    except:
Пример #5
0
from keras.layers import Input, Lambda
from keras.models import Model
from encode_data import encode_data
from show_neighbors import show_neighbors
from sklearn.neighbors import NearestNeighbors
from sklearn.manifold import TSNE

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train / 255
x_test = x_test / 255

in_dim = (28, 28, 1)
out_dim = 128

encoder = convnet(in_dim, out_dim)

in_a = Input(shape=in_dim)
in_p = Input(shape=in_dim)
in_n = Input(shape=in_dim)

emb_a = encoder(in_a)
emb_p = encoder(in_p)
emb_n = encoder(in_n)

positive_dist = Lambda(euclidean_distance, name='pos_dist')([emb_a, emb_p])
negative_dist = Lambda(euclidean_distance, name='neg_dist')([emb_a, emb_n])
tertiary_dist = Lambda(euclidean_distance, name='ter_dist')([emb_p, emb_n])

stacked_dists = Lambda(lambda vects: K.stack(vects, axis=1),
                       name='stacked_dists')(
Пример #6
0
    if output_file.split('.')[-1] in allowed_format:
        pure_name='.'.join(output_file.split('.')[:-1])
        mark=pure_name.split('_')[0]
        suffix=pure_name.split('_')[-1]
        if suffix==output_suffix:
            output_name2file[mark]=output_folder+os.sep+output_file
print('In the decode folder, there are %d detected files'%len(output_name2file.keys()))

name2files={}
for key in refer_name2file:
    if key in output_name2file:
        name2files[key]=(refer_name2file[key],output_name2file[key])
print('There are %d reference-decode paire detected'%len(name2files.keys()))

# Load model
my_convnet=convnet.convnet(convnet_params)
my_convnet.train_validate_test_init()
my_convnet.load_params(file2dump=model2load)

word2idx=cPickle.load(open(word2idx_file, 'r'))

tosave={'summary':{'cosine_average':0.0, 'dist_average':0.0, 'valid_documents':0},'details':[]}
r=rouge.Rouge()

for idx,key in enumerate(name2files):
    sys.stdout.write('loading documents %d/%d=%.1f%%\r'%((idx+1),len(name2files.keys()),
        float(idx+1)/float(len(name2files.keys()))*100))
    sys.stdout.flush()
    refer_file,output_file=name2files[key]
    try:
        refer_sequence=open(refer_file, 'r').readlines()
def main(unused_argv):
    if len(unused_argv) != 1:
        raise Exception('Problem with flags: %s' % str(unused_argv))

    try:
        assert (FLAGS.mode == 'decode')
    except:
        raise ValueError('mode much be "decode" but now it is %s' %
                         str(FLAGS.mode))
    FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
    try:
        assert (os.path.exists(FLAGS.log_root))
    except:
        raise ValueError('Invalid log_root: %s' % str(FLAGS.log_root))
    FLAGS.batch_size = FLAGS.beam_size

    data_manager = BinaryDataManager(binary_file=FLAGS.data_path,
                                     single_pass=True)
    data_manager.load_data()

    # Loading the external information first
    extra_info = {}
    if os.path.exists(FLAGS.external_config):
        external_params = xml_parser.parse(FLAGS.external_config, flat=False)

        if 'sent2vec_params' in external_params:
            sent2vec_params = external_params['sent2vec_params']
            convnet_params = sent2vec_params['convnet_params']
            convnet_model2load = sent2vec_params['model2load']

            gamma = 0.2 if not 'gamma' in sent2vec_params else sent2vec_params[
                'gamma']

            my_convnet = convnet.convnet(convnet_params)
            my_convnet.train_validate_test_init()
            my_convnet.load_params(file2load=convnet_model2load)

            fixed_vars = tf.global_variables()
            fixed_vars.remove(my_convnet.embedding_matrix)

            extra_info['sent2vec'] = {'gamma': gamma, 'network': my_convnet}
            extra_info['fixed_vars'] = fixed_vars

        if 'key_phrases' in external_params:
            # TODO: phrase some parameters to import the results of key-phrase extracted or \
            # parameters for online key-phrase extraction
            extra_info['key_phrases'] = {}
            raise NotImplementedError(
                'Key phrases part has not been implemented yet')

    model_hp_list = [
        'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag',
        'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage',
        'cov_loss_wt', 'pointer_gen'
    ]
    model_hp_dict = {}
    for key, value in FLAGS.__flags.iteritems():
        if key in model_hp_list:
            model_hp_dict[key] = value
    model_settings = namedtuple('HParams',
                                model_hp_dict.keys())(**model_hp_dict)
    model_settings = model_settings._replace(max_dec_steps=1)

    for folder in [
            FLAGS.article_folder, FLAGS.refer_folder, FLAGS.output_folder
    ]:
        if not os.path.exists(folder):
            os.makedirs(folder)

    solver = RunTimeWrapper(hp=FLAGS,
                            model_settings=model_settings,
                            extra_info=extra_info)
    solver.start()
    result2write = ''
    for idx, (article, abstract) in enumerate(data_manager.text_abstract_pair):
        sys.stdout.write(
            'Analysizing the documents %d/%d = %.1f%% \r' %
            (idx + 1, len(data_manager.text_abstract_pair), float(idx + 1) /
             float(len(data_manager.text_abstract_pair)) * 100))
        sys.stdout.flush()
        _, summary = solver.run(query=article)
        abstract = '\n'.join(abstract)
        # Reference and compare
        with open(FLAGS.article_folder + os.sep + '%04d_article.txt' % idx,
                  'w') as fopen:
            fopen.write(article)
        with open(FLAGS.refer_folder + os.sep + '%04d_reference.txt' % idx,
                  'w') as fopen:
            fopen.write(abstract)
        with open(FLAGS.output_folder + os.sep + '%04d_decode.txt' % idx,
                  'w') as fopen:
            fopen.write(summary)
        result2write += '\n\n===\n%s\n\n>>>refer:\n%s\n\n>>>output:\n%s\n' % (
            article, abstract, summary)
        if (idx + 1) % 100 == 0:
            with open('results.txt', 'w') as fopen:
                fopen.write(result2write)
    solver.end()
Пример #8
0
data_dir = arguments['data_dir']
output_dir = arguments['output_dir']


with tf.Graph().as_default():
    tf.set_random_seed(1234)

    image_data, label_data = read.preprocess(file = os.path.join(data_dir, 'train.json'))
    batch = read.batch(images = image_data, labels  = label_data)
#training  
  
    X = tf.placeholder(dtype = tf.float32, shape=(100, 75, 75, 2))
    Y = tf.placeholder(dtype = tf.int32, shape=(100, 1))
    
        
    output = convnet.convnet(X)
    
    loss = convnet.loss(output, Y)
    loss_summary = tf.summary.scalar(name = 'loss_summary', tensor = loss) 
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
    
    grads = optimizer.compute_gradients(loss)
    train_op = optimizer.apply_gradients(grads)
    
    saver = tf.train.Saver(save_relative_paths= True)
 
#eval_ops    
    eval_images = tf.placeholder(dtype = tf.float32, name= 'eval_images')
    eval_op = convnet.convnet(eval_images)