def read_overmap_terrain_data() -> None:
    """
    Fill the OVERMAP_TERRAIN_COLORS
    """
    overmap_terrain_data, errors = import_data(
        json_dir=Path('../../data/json/overmap/overmap_terrain/'),
        json_fmatch='*.json',
    )
    if errors:
        print(errors)

    for entry in overmap_terrain_data:
        if entry.get('type') != 'overmap_terrain':
            continue
        entry_ids = entry.get('id')
        if not entry_ids:
            continue

        color_name = entry.get('color')
        COLOR_NAMES.add(color_name)

        color = SCHEME['colors'].get(color_name)
        if not color:
            continue

        if isinstance(entry_ids, str):
            entry_ids = (entry_ids, )

        for terrain_id in entry_ids:
            OVERMAP_TERRAIN_DATA[terrain_id] = color
def read_mapgen_palettes() -> None:
    """
    Fill the PALETTES global
    """
    palette_entries, errors = import_data(
        json_dir=Path('../../data/json/mapgen_palettes/'),
        json_fmatch='*.json',
    )
    if errors:
        print(errors)

    for entry in palette_entries:
        add_palette(entry)
def get_mapgen_data(
    mapgen_dir: Path,
    pattern: str,
) -> list:
    """
    Get all mapgen entries
    """
    mapgen_data, errors = import_data(
        json_dir=mapgen_dir,
        json_fmatch=pattern,
    )
    if errors:
        print(errors)

    return mapgen_data
def read_terrain_color_names() -> None:
    """
    Fill the TERRAIN_COLOR_NAMES global
    """
    terrain_data, errors = import_data(
        json_dir=Path('../../data/json/furniture_and_terrain/'),
        json_fmatch='terrain*.json',
    )
    if errors:
        print(errors)

    for terrain in terrain_data:
        terrain_type = terrain.get('type')
        terrain_id = terrain.get('id')
        terrain_color = terrain.get('color')
        if isinstance(terrain_color, list):
            terrain_color = terrain_color[0]
        if terrain_type == 'terrain' and terrain_id and terrain_color:
            TERRAIN_COLOR_NAMES[terrain_id] = terrain_color
            COLOR_NAMES.add(terrain_color)
Exemplo n.º 5
0
parser.add_argument("--fnmatch",
        default="*.json",
        help="override with glob expression to select a smaller fileset.")
parser.add_argument("--all",
        action="store_true",
        help="if set, includes all matches. if not set, includes first match in the stream.")
parser.add_argument("where",
        action=WhereAction, nargs='+', type=str,
        help="where exclusions of the form 'where_key=where_val', no quotes.")



if __name__ == "__main__":
    args = parser.parse_args()

    json_data, load_errors = import_data(json_fmatch=args.fnmatch)
    if load_errors:
        # If we start getting unexpected JSON or other things, might need to
        # revisit quitting on load_errors
        print("Error loading JSON data.")
        for e in load_errrors:
            print(e)
        sys.exit(1)
    elif not json_data:
        print("No data loaded.")
        sys.exit(1)

    # Wasteful iteration, but less code to maintain on a tool that will likely
    # change again.
    plucked = [item for item in json_data if matches_all_wheres(item, args.where)]
Exemplo n.º 6
0
        if not stats:
            print("Sorry, didn't find any stats for '%s' in the JSON." % search_key)
            sys.exit(1)

        title = "Count of values from field '%s'" % search_key
        print("\n\n%s" % title)
        print("(Data from %s out of %s blobs)" % (num_matches, len(json_data)))
        print("-" * len(title))
        ui_counts_to_columns(stats)
    elif len(sys.argv) == 3 and sys.argv[2] == "--json":
        # Count values associated with key, machine output.
        search_key = sys.argv[1]
        where_key = None
        where_value = None

        json_data = import_data()[0]
        stats, num_matches = value_counter(json_data, search_key, where_key, where_value)
        if not stats:
            # Still JSON parser friendly, indicator of fail with emptiness.
            print(json.dumps([]))
            sys.exit(1)
        else:
            print(json.dumps(stats))
    elif len(sys.argv) == 4:
        # Count values associated with key, filter, human friendly output.
        search_key = sys.argv[1]
        where_key = sys.argv[2]
        where_value = sys.argv[3]

        json_data = import_data()[0]
        stats, num_matches = value_counter(json_data, search_key, where_key, where_value)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pylab as plt
from scipy.io import loadmat

import model as m
import params
import util

#Import data
raw_data = util.import_data()
#Remove nans
util.remove_nans(raw_data)
#Normalize
raw_data = util.normalize(raw_data)

#Load in nan_map
nan_map = loadmat(params.NAN_MAP_PATH)['nan_map'].astype(bool)

#Convert nans back to 0
raw_data = util.reset_nan_values(raw_data, nan_map)

#For each window, run the algorithm
#for i in range(int(params.TV_SPLIT * len(raw_data)) - 1 - params.PREDICT_WINDOW_SIZE):
for i in range(1):
    print("SIMULATION WINDOW %d OUT OF %d:" %
          ((i + 1), int(params.TV_SPLIT * len(raw_data)) - 1 -
           params.PREDICT_WINDOW_SIZE))
Exemplo n.º 8
0
    def run(self):

        # Load corpus
        corpus = import_data(self.corpus)
        self.dictionary, self.reverse_dictionary, sent_lengths, self.max_sent_len, enc_data, dec_data, dec_lab = build_dictionary(
            corpus)

        # Save metadata for visualisation of embedding matrix
        meta_data = sorted(self.dictionary, key=model.dictionary.get)
        print(len(meta_data))
        with open('meta_data.tsv', 'w') as f:
            tsv_writer = csv.writer(f, dialect='excel')
            tsv_writer.writerow(
                str(i.encode('utf-8')) + '\n' for i in meta_data)

        # np.savetxt("meta_data.tsv", meta_data, fmt="%s")

        self.dictionary = sorted(self.dictionary.items(),
                                 key=operator.itemgetter(1))
        self.vocabulary_size = len(self.dictionary)
        self.max_sent_len += 1

        # Create datasets for encoder and decoders
        enc_data = enc_data[1:-1]
        enc_lengths = sent_lengths[1:-1]
        post_lengths = sent_lengths[2:] + 1
        post_data = dec_data[2:]
        post_lab = dec_lab[2:]
        pre_lengths = sent_lengths[:-2] + 1
        pre_data = dec_data[:-2]
        pre_lab = dec_lab[:-2]

        # Print summary statistics
        self.corpus_length = len(enc_data)
        self.corpus_stats()

        self.graph = tf.Graph()

        with self.graph.as_default():

            print('\r~~~~~~~ Building model ~~~~~~~\r')
            self.initializer = tf.random_normal_initializer()

            # Variables
            self.word_embeddings = tf.get_variable(
                'embeddings', [self.vocabulary_size, self.embedding_size],
                tf.float32,
                initializer=self.initializer)
            self.W_pre = tf.get_variable(
                'precoder/weight', [self.embedding_size, self.vocabulary_size],
                tf.float32,
                initializer=self.initializer)
            self.b_pre = tf.get_variable('precoder/bias',
                                         [self.vocabulary_size],
                                         tf.float32,
                                         initializer=self.initializer)
            self.W_post = tf.get_variable(
                'postcoder/weight',
                [self.embedding_size, self.vocabulary_size],
                tf.float32,
                initializer=self.initializer)
            self.b_post = tf.get_variable('postcoder/bias',
                                          [self.vocabulary_size],
                                          tf.float32,
                                          initializer=self.initializer)

            global_step = tf.Variable(0, name='global_step', trainable=False)

            # Encoder placeholders
            sentences = tf.placeholder(tf.int32, [None, None], "sentences")
            sentences_lengths = tf.placeholder(tf.int32, [None],
                                               "sentences_lengths")

            # Postcoder placeholders
            post_inputs = tf.placeholder(tf.int32, [None, None], "post_inputs")
            post_labels = tf.placeholder(tf.int32, [None, None], "post_labels")
            post_sentences_lengths = tf.placeholder(tf.int32, [None],
                                                    "post_sentences_lengths")

            # Precoder placeholders
            pre_inputs = tf.placeholder(tf.int32, [None, None], "pre_inputs")
            pre_labels = tf.placeholder(tf.int32, [None, None], "pre_labels")
            pre_sentences_lengths = tf.placeholder(tf.int32, [None],
                                                   "pre_sentences_lengths")

            # Embed sentences
            sentences_embedded = self.embed_data(sentences)
            post_inputs_embedded = self.embed_data(post_inputs)
            pre_inputs_embedded = self.embed_data(pre_inputs)

            # Encoder
            encoded_sentences = self.encoder(sentences_embedded,
                                             sentences_lengths,
                                             self.bidirectional)

            # Decoder for following sentence
            post_logits_projected, post_logits = self.decoder(
                decoder_inputs=post_inputs_embedded,
                encoder_state=encoded_sentences,
                name='postcoder',
                lengths=post_sentences_lengths,
                train=True)

            # Decoder for previous sentence
            pre_logits_projected, pre_logits = self.decoder(
                decoder_inputs=pre_inputs_embedded,
                encoder_state=encoded_sentences,
                name='precoder',
                lengths=pre_sentences_lengths,
                train=True)

            # Compute loss
            if self.loss_function == 'softmax':
                post_loss = self.get_softmax_loss(post_labels,
                                                  post_logits_projected)
                pre_loss = self.get_softmax_loss(pre_labels,
                                                 pre_logits_projected)
            else:
                post_loss = self.get_sampled_softmax_loss(post_labels,
                                                          post_logits,
                                                          name='postcoder')
                pre_loss = self.get_sampled_softmax_loss(pre_labels,
                                                         pre_logits,
                                                         name='precoder')

            loss = pre_loss + post_loss
            opt_op = tf.contrib.layers.optimize_loss(
                loss=loss,
                global_step=global_step,
                learning_rate=self.learning_rate,
                optimizer='Adam',
                clip_gradients=2.0,
                learning_rate_decay_fn=None,
                summaries=['loss'])

            # Decode sentences at prediction time
            pre_predict = self.decoder(decoder_inputs=pre_inputs_embedded,
                                       encoder_state=encoded_sentences,
                                       name='precoder',
                                       lengths=pre_sentences_lengths,
                                       train=False)
            post_predict = self.decoder(decoder_inputs=post_inputs_embedded,
                                        encoder_state=encoded_sentences,
                                        name='postcoder',
                                        lengths=post_sentences_lengths,
                                        train=False)
            predict = [pre_predict, post_predict]

        with tf.Session(graph=self.graph) as session:

            self.a = tf.contrib.graph_editor.get_tensors(self.graph)
            train_loss_writer = tf.summary.FileWriter(
                './tensorboard/train_loss', session.graph)

            # Use the same LOG_DIR where you stored your checkpoint.
            embedding_writer = tf.summary.FileWriter('./tensorboard/',
                                                     session.graph)

            config = projector.ProjectorConfig()
            embedding = config.embeddings.add()
            embedding.tensor_name = self.word_embeddings.name
            # Link this tensor to its metadata file (e.g. labels).
            embedding.metadata_path = os.path.join('./meta_data.tsv')

            # Saves a configuration file that TensorBoard will read during startup.
            projector.visualize_embeddings(embedding_writer, config)

            merged = tf.summary.merge_all()

            print('\r~~~~~~~ Initializing variables ~~~~~~~\r')
            tf.global_variables_initializer().run()

            print('\r~~~~~~~ Starting training ~~~~~~~\r')
            start_time = time.time()

            try:
                train_summaryIndex = -1

                for epoch in range(self.num_epochs):
                    self.is_train = True
                    epoch_time = time.time()
                    print('----- Epoch', epoch, '-----')
                    print('Shuffling dataset')

                    perm = np.random.permutation(self.corpus_length)

                    enc_lengths_perm = enc_lengths[perm]
                    enc_data_perm = enc_data[perm]
                    post_lengths_perm = post_lengths[perm]
                    post_inputs_perm = np.array(post_data)[perm]
                    post_labels_perm = np.array(post_lab)[perm]
                    pre_lengths_perm = pre_lengths[perm]
                    pre_inputs_perm = np.array(pre_data)[perm]
                    pre_labels_perm = np.array(pre_lab)[perm]

                    total_loss = 0
                    predict_step = 50

                    for step in range(self.corpus_length // self.batch_size):
                        begin = step * self.batch_size
                        end = (step + 1) * self.batch_size

                        batch_enc_lengths = enc_lengths_perm[begin:end]
                        batch_enc_inputs = enc_data_perm[begin:end]
                        batch_post_lengths = post_lengths_perm[begin:end]
                        batch_post_inputs = post_inputs_perm[
                            begin:end, :np.max(batch_post_lengths)]
                        batch_post_labels = post_labels_perm[
                            begin:end, :np.max(batch_post_lengths)]
                        batch_pre_lengths = pre_lengths_perm[begin:end]
                        batch_pre_inputs = pre_inputs_perm[
                            begin:end, :np.max(batch_pre_lengths)]
                        batch_pre_labels = pre_labels_perm[
                            begin:end, :np.max(batch_pre_lengths)]

                        train_dict = {
                            sentences: batch_enc_inputs,
                            sentences_lengths: batch_enc_lengths,
                            post_inputs: batch_post_inputs,
                            post_labels: batch_post_labels,
                            post_sentences_lengths: batch_post_lengths,
                            pre_inputs: batch_pre_inputs,
                            pre_labels: batch_pre_labels,
                            pre_sentences_lengths: batch_pre_lengths
                        }

                        _, loss_val, batch_summary, glob_step = session.run(
                            [opt_op, loss, merged, global_step],
                            feed_dict=train_dict)
                        train_loss_writer.add_summary(
                            batch_summary, step +
                            (self.corpus_length // self.batch_size) * epoch)

                        total_loss += loss_val

                        if glob_step % predict_step == 0:
                            # if step > 0:
                            print("Average loss at step ", glob_step, ": ",
                                  total_loss / predict_step)
                            total_loss = 0

                            print('\nOriginal sequence:\n')
                            print(
                                self.print_sentence(batch_pre_inputs[0, 1:],
                                                    batch_pre_lengths[0] - 1))
                            print(
                                self.print_sentence(batch_enc_inputs[0],
                                                    batch_enc_lengths[0]))
                            print(
                                self.print_sentence(batch_post_inputs[0, 1:],
                                                    batch_post_lengths[0] - 1))

                            test_enc_lengths = np.expand_dims(
                                batch_enc_lengths[0], 0)
                            test_enc_inputs = np.expand_dims(
                                batch_enc_inputs[0], 0)
                            test_post_lengths = np.expand_dims(
                                batch_post_lengths[0], 0)
                            test_post_inputs = np.expand_dims(
                                batch_post_inputs[0], 0)
                            test_post_labels = np.expand_dims(
                                batch_post_labels[0], 0)
                            test_pre_lengths = np.expand_dims(
                                batch_pre_lengths[0], 0)
                            test_pre_inputs = np.expand_dims(
                                batch_pre_inputs[0], 0)
                            test_pre_labels = np.expand_dims(
                                batch_pre_labels[0], 0)

                            test_dict = {
                                sentences_lengths: test_enc_lengths,
                                sentences: test_enc_inputs,
                                post_sentences_lengths: test_post_lengths,
                                post_inputs: test_post_inputs,
                                post_labels: test_post_labels,
                                pre_sentences_lengths: test_pre_lengths,
                                pre_inputs: test_pre_inputs,
                                pre_labels: test_pre_labels
                            }

                            pre_prediction, post_prediction = session.run(
                                [predict], feed_dict=test_dict)[0]

                            print(
                                '\nPredicted previous and following sequence around original sentence:\n'
                            )
                            print(
                                self.print_sentence(pre_prediction[0],
                                                    len(pre_prediction[0])))
                            print(
                                self.print_sentence(batch_enc_inputs[0],
                                                    batch_enc_lengths[0]))
                            print(
                                self.print_sentence(post_prediction[0],
                                                    len(post_prediction[0])))

                            end_time = time.time()
                            print('\nTime for %d steps: %0.2f seconds' %
                                  (predict_step, end_time - start_time))
                            start_time = time.time()
                            print(
                                '\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
                            )

                    saver = tf.train.Saver()
                    saver.save(session,
                               os.path.join('./tensorboard/', 'model.ckpt'))

            except KeyboardInterrupt:
                save = input('save?')
                if 'y' in save:
                    self.save_model(session, 0)
Exemplo n.º 9
0
def test_auto_encoder_with_data(control_seq, estimator):
    """
    test data with control seq missed
    """
    X_train_raw, y_train_raw, X_test_raw, y_test_raw = import_data()

    X_train_raw_0 = mask_source_channel(control_seq, X_train_raw, 1)
    X_test_raw_0 = mask_source_channel(control_seq, X_test_raw, 1)

    # split train data into 5 folds
    kf = KFold(n_splits=5, shuffle=True)

    for train_index, validation_index in kf.split(X_train_raw):
        pass
    # raw data
    X_train_raw_train = X_train_raw.iloc[train_index]
    X_train_raw_validation = X_train_raw.iloc[validation_index]
    # missed features data
    X_train_raw_0_train = X_train_raw_0.iloc[train_index]
    X_train_raw_0_validation = X_train_raw_0.iloc[validation_index]

    # drop na
    X_train_raw_0_train_dropna = X_train_raw_0_train.dropna(axis=1)
    X_test_raw_0_dropna = X_test_raw_0.dropna(axis=1)

    # normalize data
    X_train_all_dropna_normalize = normalize_data(X_train_raw_0_train_dropna,
                                                  X_train_raw_train)
    X_train_raw_train_normalize = normalize_data(X_train_raw_train,
                                                 X_train_raw_train)

    X_test_raw_0_dropna_normalize = normalize_data(X_test_raw_0_dropna,
                                                   X_test_raw)

    # this is the size of our encoded representations
    # we know that there are 561 features...
    # reduce to 28 dim (compr rate = 20)
    encoding_dim = 4

    # this is our input placeholder
    input_seq = Input(shape=(X_train_raw_0_train_dropna.shape[1], ))

    # encode layer
    encoded = Dense(128, activation='relu')(input_seq)
    #encoded = Dense(64, activation='relu')(encoded)
    encoded = Dense(16, activation='relu')(encoded)
    encoded = Dense(8, activation='relu')(encoded)
    encoder_output = Dense(encoding_dim)(encoded)

    # decode layer
    decoded = Dense(16, activation='relu')(encoder_output)
    #decoded = Dense(64, activation='relu')(decoded)
    decoded = Dense(128, activation='relu')(decoded)
    decoded = Dense(561, activation='sigmoid')(decoded)

    # construct autoencoder
    autoencoder = Model(inputs=input_seq, outputs=decoded)

    # compile autoencoder
    autoencoder.compile(optimizer=Adam(lr=LEARNING_RATE),
                        loss='mean_squared_error')

    autoencoder.summary()

    # training
    from keras.callbacks import TensorBoard

    autoencoder.fit(
        X_train_all_dropna_normalize,
        X_train_raw_train_normalize,
        epochs=300,
        batch_size=100,
        #shuffle=True,
        #validation_data=(X_train_raw_0_validation, X_train_raw_validation),
        callbacks=[TensorBoard(log_dir='./tmp/autoencoder')])
    # tensorboard --logdir=E:\har\tmp\autoencoder

    # predict and denormalize data
    X_test_raw_refilled = autoencoder.predict(X_test_raw_0_dropna_normalize)
    X_test_raw_refilled_denor = denormalize_data(X_test_raw_refilled,
                                                 X_test_raw)

    ynew = estimator.predict(X_test_raw_refilled_denor)
    ynew = ynew + 1

    acc_tests = np.trace(confusion_matrix(y_test_raw, ynew)) / len(ynew)
    return acc_tests
Exemplo n.º 10
0
#
# # without PCA
# X1 = phishing_X
#
# X_train, X_test, y_train, y_test = train_test_split(X1, Y1, test_size=0.3)
# num_features = X_train.shape[1]
# num_classes = 2
# nodes = (num_classes + num_features) / 2
# momentum1, learning_rate1 = 0.9, 0.25
#
#
# end = getEpochCurves(momentum1, learning_rate1, X_train, X_test, y_train, y_test, str(X1.shape[1]))
# print "Time taken with " + str(X1.shape[1]) + " components " + str(end)

# with 25 components
phishing_X, Y1, optdigits_X, Y2 = import_data()

pca = FastICA(n_components=26, random_state=5)
X1 = pca.fit_transform(phishing_X)
X1 /= X1.std(axis=0)
print("original shape:   ", phishing_X.shape)
print("transformed shape:", X1.shape)
projected_phishing = np.hstack((X1, Y1[..., None]))
np.savetxt('phishing_ica.csv', projected_phishing, delimiter=',')

# X_train, X_test, y_train, y_test = train_test_split(X1, Y1, test_size=0.3)
# num_features = X_train.shape[1]
# num_classes = 2
# nodes = (num_classes + num_features) / 2
# momentum1, learning_rate1 = 0.9, 0.25
#
Exemplo n.º 11
0
    formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
    "--fnmatch",
    default="*.json",
    help="override with glob expression to select a smaller fileset.")
parser.add_argument(
    "where",
    action=WhereAction,
    nargs='+',
    type=str,
    help="where exclusions of the form 'where_key=where_val', no quotes.")

if __name__ == "__main__":
    args = parser.parse_args()

    json_data, load_errors = import_data(json_fmatch=args.fnmatch)
    if load_errors:
        # If we start getting unexpected JSON or other things, might need to
        # revisit quitting on load_errors
        print("Error loading JSON data.")
        for e in load_errrors:
            print(e)
        sys.exit(1)
    elif not json_data:
        print("No data loaded.")
        sys.exit(1)

    matched = []
    not_matched = []
    for item in json_data:
        if matches_all_wheres(item, args.where):
Exemplo n.º 12
0
from __future__ import print_function

import sys
import os
import json
from util import import_data, matches_where

if __name__ == "__main__":
    if len(sys.argv) == 3:
        # pluck one
        where_key = sys.argv[1]
        where_value = sys.argv[2]

        # TODO: Put the errors back in, someday, maybe.
        json_data, _ = import_data()

        plucked = None
        for item in json_data:
            is_match = matches_where(item, where_key, where_value)
            if is_match:
                plucked = item
                break

        if not plucked:
            sys.exit(1)
        else:
            print(json.dumps(plucked, indent=4))
    elif len(sys.argv) == 4 and sys.argv[3] == "--all":
        # pluck all
        where_key = sys.argv[1]
Exemplo n.º 13
0
from __future__ import print_function

import sys
import os
import json
from util import import_data, matches_where, CDDAJSONWriter

if __name__ == "__main__":
    if len(sys.argv) == 3:
        # pluck one
        where_key = sys.argv[1]
        where_value = sys.argv[2]

        # TODO: Put the errors back in, someday, maybe.
        json_data, _ = import_data()

        plucked = None
        for item in json_data:
            is_match = matches_where(item, where_key, where_value)
            if is_match:
                plucked = item
                break

        if not plucked:
            sys.exit(1)
        else:
            print(CDDAJSONWriter(plucked).dumps())
    elif len(sys.argv) == 4 and sys.argv[3] == "--all":
        # pluck all
        where_key = sys.argv[1]
Exemplo n.º 14
0
"""

# using autoencoder to refill missing data
import numpy as np
import os
os.chdir('E:\\har')

from util import *

import pandas as pd
from util import plot_confusion_matrix, import_data, mask_source_channel
import matplotlib.pyplot as plt

LEARNING_RATE = 7e-4

X_train_raw, y_train_raw, X_test_raw, y_test_raw = import_data()

X_train_raw_0 = mask_source_channel(['Acc'], X_train_raw, 1)
X_test_raw_0 = mask_source_channel(['Acc'], X_test_raw, 1)

# split train data into 5 folds
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True)

for train_index, validation_index in kf.split(X_train_raw):
    pass
# raw data
X_train_raw_train = X_train_raw.iloc[train_index]
X_train_raw_validation = X_train_raw.iloc[validation_index]
# missed features data
X_train_raw_0_train = X_train_raw_0.iloc[train_index]
Exemplo n.º 15
0
    def __init__(self, corpus, parameters):
        self.corpus = corpus
        self.para = parameters
        self.dictionary, self.reverse_dictionary, sent_lengths, self.max_sent_len, enc_data, dec_data, dec_lab = build_dictionary(
            import_data(self.corpus))
        self.dictionary_sorted = sorted(self.dictionary.items(),
                                        key=operator.itemgetter(1))
        self.vocabulary_size = len(self.dictionary_sorted)
        self.max_sent_len += 1
        self.data = autoencoder_data(enc_data=enc_data,
                                     dec_data=dec_data,
                                     dec_lab=dec_lab,
                                     sent_lengths=sent_lengths)

        print('\r~~~~~~~ Building graph ~~~~~~~\r')
        self.graph = tf.get_default_graph()
        self.initializer = tf.random_normal_initializer()

        # Variables
        self.word_embeddings = tf.get_variable(
            'embeddings', [self.vocabulary_size, self.para.embedding_size],
            tf.float32,
            initializer=self.initializer)
        self.W = tf.get_variable(
            'decoder/weight', [self.para.embedding_size, self.vocabulary_size],
            tf.float32,
            initializer=self.initializer)
        self.b = tf.get_variable('decoder/bias', [self.vocabulary_size],
                                 tf.float32,
                                 initializer=self.initializer)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        # Encoder placeholders
        self.enc_inputs = tf.placeholder(tf.int32, [None, None], "enc_inputs")
        self.enc_input_lengths = tf.placeholder(tf.int32, [None],
                                                "enc_input_lengths")

        # Decoder placeholders
        self.dec_inputs = tf.placeholder(tf.int32, [None, None], "dec_inputs")
        self.dec_labels = tf.placeholder(tf.int32, [None, None], "dec_labels")
        self.dec_input_lengths = tf.placeholder(tf.int32, [None],
                                                "dec_input_lengths")

        # Embed sentences
        enc_inputs_embedded = self.embed_data(self.enc_inputs)
        dec_inputs_embedded = self.embed_data(self.dec_inputs)

        # Encoder
        self.encoded_sentences = self.encoder(enc_inputs_embedded,
                                              self.enc_input_lengths,
                                              self.para.bidirectional)

        # Decoder for following sentence
        dec_logits_projected, dec_logits = self.decoder(
            decoder_inputs=dec_inputs_embedded,
            encoder_state=self.encoded_sentences,
            name='decoder',
            lengths=self.dec_input_lengths,
            train=True)

        # Compute loss
        if self.para.loss_function == 'softmax':
            self.loss = self.get_softmax_loss(self.dec_labels,
                                              dec_logits_projected)
        else:
            self.loss = self.get_sampled_softmax_loss(self.dec_labels,
                                                      dec_logits,
                                                      name='decoder')

        self.opt_op = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=self.global_step,
            learning_rate=self.para.learning_rate,
            optimizer='Adam',
            clip_gradients=2.0,
            learning_rate_decay_fn=None,
            summaries=['loss'])

        # Decode sentences at prediction time
        self.predict = self.decoder(decoder_inputs=dec_inputs_embedded,
                                    encoder_state=self.encoded_sentences,
                                    name='decoder',
                                    lengths=self.dec_input_lengths,
                                    train=False)
Exemplo n.º 16
0
class CDDAValues:
    """Worker class that prints table from provided data"""
    output = None

    def __init__(self, format_string):
        format_class = get_format_class_by_extension(format_string)
        self.output = format_class()

    def print_table(self, data, columns, types_filter, none_string,
                    with_header):
        if with_header:
            self.output.header(columns)
        for item in data:
            if types_filter and item.get('type') not in types_filter:
                continue

            self.output.row(item_values(item, columns, none_string))


if __name__ == "__main__":
    args = parser.parse_args()
    if args.tileset_types_only:
        args.type = TILESET_TYPES

    # Get data (don't care about load errors)
    json_data, _ = util.import_data(json_fmatch=args.fnmatch)

    worker = CDDAValues(args.format)
    worker.print_table(json_data, args.columns, args.type, args.nonestring,
                       args.with_header)
Exemplo n.º 17
0
            print("Sorry, didn't find any stats for '%s' in the JSON." %
                  search_key)
            sys.exit(1)

        title = "List of values from field '%s'" % search_key
        print("\n\n%s" % title)
        print("(Data from %s out of %s blobs)" % (num_matches, len(json_data)))
        print("-" * len(title))
        ui_values_to_columns(sorted(stats.keys()))
    elif len(sys.argv) == 3 and sys.argv[2] == "--json":
        # Count values associated with key, machine output.
        search_key = sys.argv[1]
        where_key = None
        where_value = None

        json_data = import_data()[0]
        stats, num_matches = value_counter(json_data, search_key, where_key,
                                           where_value)
        if not stats:
            # Still JSON parser friendly, indicator of fail with emptiness.
            print(json.dumps([]))
            sys.exit(1)
        else:
            print(json.dumps(sorted(stats.keys())))
    elif len(sys.argv) == 4:
        # Count values associated with key, filter, human friendly output.
        search_key = sys.argv[1]
        where_key = sys.argv[2]
        where_value = sys.argv[3]

        json_data = import_data()[0]
Exemplo n.º 18
0
        return KNeighborsClassifier(n_neighbors=n_neighbors)

    def validation_curve(self, X1, Y1, dataset_name):

        myList = list(range(1, 50))
        neighbors = filter(lambda x: x % 2 != 0, myList)
        param_grid = neighbors
        title = "Validation Curve for {} Dataset (KNN)".format(dataset_name)
        cv = StratifiedKFold(n_splits=10, random_state=42)
        X_train, X_test, y_train, y_test = train_test_split(X1,
                                                            Y1,
                                                            test_size=0.3)
        estimator = KNeighborsClassifier()
        plot_validation_curve(estimator,
                              title,
                              X1,
                              Y1,
                              "n_neighbors",
                              param_grid,
                              ylim=None,
                              xlim=(1, 50),
                              cv=cv)

        plt.show()


if __name__ == '__main__':

    X1, Y1, X2, Y2 = import_data()
    kNN().main(X1, Y1, "Letter Recognition")
    kNN().main(X2, Y2, "Madelon")
Exemplo n.º 19
0
#!/usr/bin/env python3
"""Lists duplicates in JSON by `type` and `id` fields
"""

from collections import defaultdict

from util import import_data

data = import_data()[0]
all_ids = defaultdict(set)
for obj in data:
    obj_id = obj.get('id')
    obj_type = obj.get('type')
    if obj_id and not isinstance(obj_id, list):
        if obj_id not in all_ids[obj_type]:
            all_ids[obj_type].add(obj_id)
        else:
            print(obj_type, obj_id)
def main():
    core_data, core_errors = util.import_data()
    print('Importing Generic Guns data from %r' % GG_DIR)
    gg_data, gg_errors = util.import_data(GG_DIR)

    if core_errors or gg_errors:
        print('Errors reading json:\n%s' % '\n'.join(core_errors + gg_errors))
        sys.exit(1)

    gg_migrations = get_ids(items_of_type(gg_data, 'MIGRATION'))

    core_guns = items_of_type(core_data, 'GUN')

    def is_not_fake_item(i):
        return i.get('copy-from', '') != 'fake_item'

    def is_not_whitelisted_skill(i):
        return 'skill' in i and i['skill'] not in SKILL_WHITELIST

    def has_pockets(i):
        return 'pocket_data' in i

    def lacks_whitelisted_pocket(i):
        return not any(
            pocket.get('ammo_restriction', {}).keys() & AMMO_TYPE_WHITELIST
            for pocket in i.get('pocket_data', []))

    def can_be_unwielded(i):
        return 'NO_UNWIELD' not in i.get('flags', [])

    core_guns = items_for_which_all_ancestors(core_guns, is_not_fake_item)
    core_guns = items_for_which_any_ancestor(core_guns,
                                             is_not_whitelisted_skill)
    core_guns = items_for_which_any_ancestor(core_guns, has_pockets)
    core_guns = items_for_which_all_ancestors(core_guns,
                                              lacks_whitelisted_pocket)
    core_guns = items_for_which_all_ancestors(core_guns, can_be_unwielded)

    core_magazines = items_of_type(core_data, 'MAGAZINE')
    core_magazines = items_for_which_all_ancestors(core_magazines,
                                                   lacks_whitelisted_pocket)

    core_ammo = items_of_type(core_data, 'AMMO')

    def is_not_whitelisted_ammo_type(i):
        return 'ammo_type' in i and i['ammo_type'] not in AMMO_TYPE_WHITELIST

    def is_bullet(i):
        return i.get('damage', {}).get('damage_type', '') == 'bullet'

    core_bullets = items_for_which_any_ancestor(core_ammo, is_bullet)
    core_bullets = items_for_which_any_ancestor(core_bullets,
                                                is_not_whitelisted_ammo_type)

    if (not gg_migrations or not core_guns or not core_magazines
            or not core_ammo):
        print('One of the collections is empty; something has gone wrong with '
              'data collection')
        return 1

    returncode = 0

    def check_missing(items, name):
        ids = get_ids(items) - ID_WHITELIST

        missing_migrations = ids - gg_migrations
        if missing_migrations:
            print('Missing Generic Guns migrations for these types of %s:' %
                  name)
            print('\n'.join(sorted(missing_migrations)))
            print()
            nonlocal returncode
            returncode = 1

    check_missing(core_bullets, 'ammo')
    check_missing(core_magazines, 'magazine')
    check_missing(core_guns, 'guns')

    if returncode:
        print('The above errors can be resolved by either adding suitable '
              'migrations to Generic Guns or adding to the whitelists of '
              'things not requiring migration in '
              'tools/json_tools/generic_guns_validator.py')

    return returncode
Exemplo n.º 21
0
def load_submit(submit_name: str) -> pd.DataFrame:
    try:
        msg = f"failed to decode {submit_name}."
        with open(submit_name, "r") as fin:
            upload = json.load(fin)
        msg = f"{submit_name} find no solution element."
        upload = upload.get("solution")
        msg = f"{submit_name} can not convert to dataframe."
        return pd.DataFrame.from_dict(upload)
    except Exception as e:
        print(msg)
        print(str(e))


if __name__ == '__main__':
    ok, data_total = validator.import_data('jobs.json')
    if not ok:
        print("load environment setting failed.")
        sys.exit(-1)
    js = validator.JobShop(data_total)

    submit_name = sys.argv[1] if len(sys.argv) > 1 else "submit.json"
    df_up = load_submit(submit_name)

    ok, msg = validator.prepare(js, df_up)
    if not ok:
        print(msg)
        sys.exit(-1)
    ok, msg = validator.check(js)
    if not ok:
        for l in msg: