Exemplo n.º 1
0
def process(df_sk_id_curr, item):
    name, df = item
    print(f'--- {name} ---')
    cat = {}
    cont = {}
    for sk_id_curr in tqdm(df_sk_id_curr['SK_ID_CURR']):
        data = df[df['SK_ID_CURR'] == sk_id_curr].sort_values(
            SORT_KEYS[name]).tail(MAX_LEN)
        cat[sk_id_curr] = expand(
            data.select_dtypes('category').astype('int').values + 1, MAX_LEN)
        cont[sk_id_curr] = expand(
            data.select_dtypes('float32').values, MAX_LEN)
    dump(cat, f'../data/04_sequence/{name}_cat.joblib')
    dump(cont, f'../data/04_sequence/{name}_cont.joblib')
Exemplo n.º 2
0
def subst_command(options, arglist):
    """
    Run the command for each filename in arglist.
    """
    for filename in arglist:
        cmd = util.expand(re.sub('%', filename, options.cmd))
        psys(cmd, options)
Exemplo n.º 3
0
def predict(model, x_global, x_local, x_ctx, box, **params):
    max_words = params['max_words']
    # An entire batch must be run at once, but we only use the first slot in that batch
    indices = util.left_pad([words.START_TOKEN_IDX], **params)
    x_global = util.expand(x_global, 1)
    x_local = util.expand(x_local, 1)
    indices = util.expand(indices, 1)
    x_ctx = util.expand(x_ctx, 1)

    # Input is empty padding followed by start token
    output_words = []
    for i in range(1, max_words):
        preds = model.predict([x_global, x_local, indices, x_ctx])
        indices = np.roll(indices, -1, axis=1)
        indices[:, -1] = np.argmax(preds[:], axis=1)

    return words.words(indices[0])
Exemplo n.º 4
0
    def zero_state(self, batch_size, dtype):
        with tf.variable_scope('init', reuse=self.reuse):
            read_vector_list = [
                expand(tf.tanh(learned_init(self.memory_vector_dim)),
                       dim=0,
                       N=batch_size) for i in range(self.read_head_num)
            ]

            w_list = [
                expand(tf.nn.softmax(learned_init(self.memory_size)),
                       dim=0,
                       N=batch_size)
                for i in range(self.read_head_num + self.write_head_num)
            ]

            controller_init_state = self.controller.zero_state(
                batch_size, dtype)

            if self.init_mode == 'learned':
                M = expand(tf.tanh(
                    tf.reshape(
                        learned_init(self.memory_size *
                                     self.memory_vector_dim),
                        [self.memory_size, self.memory_vector_dim])),
                           dim=0,
                           N=batch_size)
            elif self.init_mode == 'random':
                M = expand(tf.tanh(
                    tf.get_variable(
                        'init_M', [self.memory_size, self.memory_vector_dim],
                        initializer=tf.random_normal_initializer(mean=0.0,
                                                                 stddev=0.5))),
                           dim=0,
                           N=batch_size)
            elif self.init_mode == 'constant':
                M = expand(tf.get_variable(
                    'init_M', [self.memory_size, self.memory_vector_dim],
                    initializer=tf.constant_initializer(1e-6)),
                           dim=0,
                           N=batch_size)

            return NTMControllerState(controller_state=controller_init_state,
                                      read_vector_list=read_vector_list,
                                      w_list=w_list,
                                      M=M)
Exemplo n.º 5
0
def iterate_command(options, arglist):
    """
    Run a command once for each of a sequence of numbers.

    Possible enhancements would be to handle low/high/step tuples, and
    to handle an arbitrary comma delimited list of values.
    """
    (low, high) = options.irange.split(':')
    for idx in range(int(low), int(high)):
        cmd = util.expand(re.sub('%', str(idx), options.cmd))
        psys(cmd, options)
Exemplo n.º 6
0
 def global_histogram(self, input):
     out = self.quantization.encode_nn(
         input)  # batch x 313 x imsize x imsize
     out = out.type(torch.FloatTensor)  # change it to tensor
     X_onehotsum = torch.sum(torch.sum(out, dim=3),
                             dim=2)  # sum it up to batch x 313
     X_hist = torch.div(X_onehotsum,
                        util.expand(
                            torch.sum(X_onehotsum, dim=1).unsqueeze(1),
                            X_onehotsum))  # make 313 probability
     return X_hist
Exemplo n.º 7
0
def predict(model, x_global, x_local, x_ctx, box, temperature=.0):
    indices = util.left_pad([])
    #x0, x1, y0, y1 = box
    #coords = [0, (y0 + y1) / 2, (x0 + x1) / 2]
    likelihoods = []
    for i in range(MAX_WORDS):
        preds = model.predict([
            util.expand(x_global),
            util.expand(x_local),
            util.expand(indices),
            util.expand(x_ctx)
        ])
        preds = preds[0]
        indices = np.roll(indices, -1)
        if temperature > 0:
            indices[-1] = sample(preds, temperature)
        else:
            indices[-1] = np.argmax(preds, axis=-1)
        likelihoods.append(preds[indices[-1]])
    return words.words(indices), np.mean(likelihoods)
Exemplo n.º 8
0
def process(df_sk_id_curr, item):
    name, df = item
    print(f'--- {name} ---')
    cont = {}
    drop_cols = [column for column in df.columns if column.startswith('SK_ID')]
    for sk_id_curr in tqdm(df_sk_id_curr['SK_ID_CURR']):
        data = df[df['SK_ID_CURR'] == sk_id_curr].sort_values(
            SORT_KEYS[name]).tail(MAX_LEN)
        data = data.drop(drop_cols, axis=1)
        cont[sk_id_curr] = expand(data.values, MAX_LEN)
    dump(cont, f'../data/06_onehot_seq/{name}.joblib')
Exemplo n.º 9
0
 def load(self, cfg_file):
     json_obj = json.loads(open(cfg_file).read())
     self.attr['title'] = json_obj['title'] if json_obj.__contains__('title') else 'welcome to soapy'
     if json_obj.__contains__('source_files'):
         self.attr['source_files'] = expand(json_obj['source_files'])
     else:
         print 'You need to specify source files with "source_files" attr.'
         sys.exit(0)
     self.attr['description'] = json_obj['description'] if json_obj.__contains__('description') else 'description'
     self.attr['entry'] = json_obj['entry'] if json_obj.__contains__('entry') else 'entry'
     self.attr['subtitle1'] = json_obj['subtitle1'] if json_obj.__contains__('subtitle1') else 'subtitle1'
     self.attr['subtitle2'] = json_obj['subtitle2'] if json_obj.__contains__('subtitle2') else 'subtitle2'
Exemplo n.º 10
0
def xargs_wrap(cmd, rble):
    """
    Do xargs wrapping to cmd, distributing args from file rble across
    command lines.
    """
    tcmd = cmd
    rval = []
    for line in rble:
        bline = line.strip()
        for item in bline.split(" "):
            tcmd = util.expand(re.sub('%', item + ' %', tcmd))
            pending = True

            if 240 < len(tcmd):
                tcmd = re.sub(r'\s*%\s*', '', tcmd)
                rval.append(tcmd)
                pending = False
                tcmd = cmd
    if pending:
        tcmd = re.sub(r'\s*%\s*', '', tcmd)
        rval.append(tcmd)
    return(rval)
Exemplo n.º 11
0
        i += 1
    return words
            

def expand_macro(arg: str) -> list:
    """Returns a list of whatever the macro expands into, or if not a macro, just the arg in a list."""
    # character macro
    if charroll := character.get_current_character_roll(arg):
        return split_macro(charroll)
    # normal macro
    if arg in mac.macros:
        return split_macro(mac.macros[arg])
    return [arg]

# expand everything
util.expand(args, [expand_macro, lambda arg: expand_delimiters(arg, delimiters)])

# interpret commas for grouping
i = 0
while i < len(args):
    if args[i] == comma:
        # find indices of all other commas at this depth, and where depth starts and ends
        comma_indices = [i]

        # find where this depth started
        depth = 0
        j = i
        while depth >= 0 and j > 0:
            j -= 1
            if args[j] == right_paren:
                depth += 1
Exemplo n.º 12
0
    for each in patterns:
        seqNums.append(each.squence)
    maxSeqs = u.maxSeq(seqNums)
    print ("The sequential patterns :")
    for i in maxSeqs:
        for sth in i:
            print "[",
            for ssth in sth:
                print ssth,
            print "]",
        print ""
    print >> ff, "The sequential patterns :"
    for i in maxSeqs:
        for sth in i:
            print >> ff, "[",
            for ssth in sth:
                print >> ff, ssth,
            print >> ff, "]",
        print >> ff, ""
    ff.close()
    flitedSeqs = u.fliter(maxSeqs)
    expandedSeqs = u.expand(maxSeqs)
    maxStages = u.genPlotDatas(maxSeqs)
    flitedStages = u.genPlotDatas(flitedSeqs)
    expandedStages = u.genPlotDatas(expandedSeqs)
    allStages = []
    allStages += [maxStages]
    allStages += [flitedStages]
    allStages += [expandedStages]
    u.drawStages(allStages)
Exemplo n.º 13
0
    for each in patterns:
        seqNums.append(each.squence)
    maxSeqs = u.maxSeq(seqNums)
    print("The sequential patterns :")
    for i in maxSeqs:
        for sth in i:
            print "[",
            for ssth in sth:
                print ssth,
            print "]",
        print ""
    print >> ff,"The sequential patterns :"
    for i in maxSeqs:
        for sth in i:
            print >> ff,"[",
            for ssth in sth:
                print >> ff,ssth,
            print >> ff,"]",
        print >> ff,""
    ff.close()
    flitedSeqs = u.fliter(maxSeqs)
    expandedSeqs = u.expand(maxSeqs)
    maxStages = u.genPlotDatas(maxSeqs)
    flitedStages = u.genPlotDatas(flitedSeqs)
    expandedStages = u.genPlotDatas(expandedSeqs)
    allStages = []
    allStages += [maxStages]
    allStages += [flitedStages]
    allStages += [expandedStages]
    u.drawStages(allStages)
Exemplo n.º 14
0
def train():
    TIMESTAMP = "{0:%Y-%m-%d-%H-%M/}".format(datetime.now())
    log.log_info('program start')
    data, num_good, num_bad = util.load_train_data(num_data // 2)
    log.log_debug('Data loading completed')

    # resample
    data, length = util.resample(data, 600)
    data = util.reshape(data, length)
    good_data_origin = data[:num_good, :]
    bad_data_origin = data[num_good:, :]

    # extract bad data for test and train
    permutation = list(np.random.permutation(len(bad_data_origin)))
    shuffled_bad_data = bad_data_origin[permutation, :]
    test_bad_data = shuffled_bad_data[:int(num_bad * 0.3), :]
    train_bad_data_origin = shuffled_bad_data[int(num_bad * 0.3):, :]
    # extract corresponding good data for test and train
    permutation = list(np.random.permutation(len(good_data_origin)))
    shuffled_good_data = good_data_origin[permutation, :]
    test_good_data = shuffled_good_data[:len(test_bad_data), :]
    train_good_data = shuffled_good_data[len(test_bad_data):, :]

    assert len(test_bad_data) == len(test_good_data)
    # construct test data
    test_y = np.array([1.] * len(test_good_data) + [0.] * len(test_bad_data), dtype=np.float).reshape(
        (len(test_bad_data) + len(test_good_data), 1))
    test_x = np.vstack((test_good_data, test_bad_data))

    # expand the number of bad data for train
    train_x = np.vstack((train_good_data, train_bad_data_origin))
    train_y = np.array([1.] * len(train_good_data) + [0.] * len(train_bad_data_origin), dtype=np.float).reshape(
        (len(train_bad_data_origin) + len(train_good_data), 1))

    train_x, train_y, num_expand = util.expand(train_x, train_y)

    # regularize
    for i in range(len(train_x)):
        train_x[i, :, 0] = util.regularize(train_x[i, :, 0])
        train_x[i, :, 1] = util.regularize(train_x[i, :, 1])
        train_x[i, :, 2] = util.regularize(train_x[i, :, 2])
    for i in range(len(test_x)):
        test_x[i, :, 0] = util.regularize(test_x[i, :, 0])
        test_x[i, :, 1] = util.regularize(test_x[i, :, 1])
        test_x[i, :, 2] = util.regularize(test_x[i, :, 2])

    # random
    train_x, train_y = util.shuffle_data(train_x, train_y)

    log.log_debug('prepare completed')
    log.log_info('convolution layers: ' + str(conv_layers))
    log.log_info('filters: ' + str(filters))
    log.log_info('full connected layers: ' + str(fc_layers))
    log.log_info('learning rate: %f' % learning_rate)
    log.log_info('keep prob: ' + str(keep_prob))
    log.log_info('the number of expanding bad data: ' + str(num_expand))
    log.log_info('mini batch size: ' + str(mini_batch_size))

    if mini_batch_size != 0:
        assert mini_batch_size <= len(train_x)

    cnn = Cnn(conv_layers, fc_layers, filters, learning_rate)
    (m, n_W0, n_C0) = train_x.shape
    n_y = train_y.shape[1]

    # construction calculation graph
    cnn.initialize(n_W0, n_C0, n_y)
    cost = cnn.cost()
    optimizer = cnn.get_optimizer(cost)
    predict, accuracy = cnn.predict()

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:

        # log for tensorboard
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter("resource/tsb/train/" + TIMESTAMP, sess.graph)
        test_writer = tf.summary.FileWriter("resource/tsb/test/" + TIMESTAMP)

        if enable_debug:
            sess = tf_debug.LocalCLIDebugWrapperSession(sess)

        sess.run(init)

        for i in range(1, num_epochs + 1):
            if mini_batch_size != 0:
                num_mini_batches = int(m / mini_batch_size)
                mini_batches = util.random_mini_batches(train_x, train_y, mini_batch_size)

                cost_value = 0
                for mini_batch in mini_batches:
                    (mini_batch_x, mini_batch_y) = mini_batch
                    _, temp_cost = sess.run([optimizer, cost], feed_dict={cnn.x: mini_batch_x, cnn.y: mini_batch_y,
                                                                          cnn.keep_prob: keep_prob})
                    cost_value += temp_cost
                cost_value /= num_mini_batches
            else:
                _, cost_value = sess.run([optimizer, cost],
                                         feed_dict={cnn.x: train_x, cnn.y: train_y, cnn.keep_prob: keep_prob})

            # disable dropout
            summary_train, train_accuracy = sess.run([merged, accuracy],
                                                     feed_dict={cnn.x: train_x, cnn.y: train_y,
                                                                cnn.keep_prob: 1})
            summary_test, test_accuracy = sess.run([merged, accuracy],
                                                   feed_dict={cnn.x: test_x, cnn.y: test_y, cnn.keep_prob: 1})

            train_writer.add_summary(summary_train, i - 1)
            test_writer.add_summary(summary_test, i - 1)

            if print_detail and (i % 10 == 0 or i == 1):
                info = '\nIteration %d\n' % i + \
                       'Cost: %f\n' % cost_value + \
                       'Train accuracy: %f\n' % train_accuracy + \
                       'Test accuracy: %f' % test_accuracy
                log.log_info(info)

            # stop when test>0.95 and train>0.99
            if test_accuracy >= 0.95 and train_accuracy >= 0.99:
                info = '\nIteration %d\n' % i + \
                       'Cost: %f\n' % cost_value + \
                       'Train accuracy: %f\n' % train_accuracy + \
                       'Test accuracy: %f' % test_accuracy
                log.log_info(info)
                saver.save(sess, "resource/model/" + TIMESTAMP)
                break
            saver.save(sess, "resource/model/" + TIMESTAMP)
        train_writer.close()
        test_writer.close()

    log.log_info('program end')
Exemplo n.º 15
0
                a, b = k
                return toBitList(a), toBitList(b)

            # All 4 are bit lists
            lf0, rf0 = helper(splitInHalf(pout0))
            lf1, rf1 = helper(splitInHalf(pout1))

            # Is a bit list
            cons = toBitList(''.join(map(util.byteToBitString, util.xor))[32:])

            # Are bit lists
            rPrime = util.xorBitList(rf0, rf1)
            cPrime = util.applyBitPermutation(util.pinv,
                                              util.xorBitList(rPrime, cons))

            e0 = util.expand(lf0)
            e1 = util.expand(lf1)

            co1 = []  # Inputs that go into S box for 1st plaintext
            co2 = []  # Inputs that go into S box for 2nd plaintext
            c = []
            for j in util.sBoxesForThisXor:
                co1.append(bitListToInt(getblock(j, e0, 6)))
                co2.append(bitListToInt(getblock(j, e1, 6)))
                c.append(bitListToInt(getblock(j, cPrime, 4)))

            # First guess a 6 bit value
            for k in range(64):
                # Try that for each S box relevant for this XOR value
                for i in range(len(util.sBoxesForThisXor)):
                    # If the input and output match
Exemplo n.º 16
0
                    default=True,
                    help='Don\'t make a last-run file.')
parser.add_argument('--rerun',
                    dest='rerun',
                    action='store_true',
                    default=False,
                    help='Do nothing else but re run the last deployment.')
parser.add_argument('--colorless',
                    dest='color',
                    action='store_false',
                    default=True,
                    help='Don\'t use any colors.')
parser.set_defaults(dry=False, copy=False)
args = parser.parse_args()

depot = util.expand(args.depot)

#configurations
configurations_file = os.path.join(depot, conf.CONFIGURATIONS_FILE_NAME)
configurations_file_exists = os.path.isfile(configurations_file)
if configurations_file_exists:
    configurations_parser = util.get_parser(configurations_file)
    configurations_parse_succes = not (configurations_parser is None)


def deploy():
    """
    Deploy SUS entirely
    """

    deploy_configurations()