def process(df_sk_id_curr, item): name, df = item print(f'--- {name} ---') cat = {} cont = {} for sk_id_curr in tqdm(df_sk_id_curr['SK_ID_CURR']): data = df[df['SK_ID_CURR'] == sk_id_curr].sort_values( SORT_KEYS[name]).tail(MAX_LEN) cat[sk_id_curr] = expand( data.select_dtypes('category').astype('int').values + 1, MAX_LEN) cont[sk_id_curr] = expand( data.select_dtypes('float32').values, MAX_LEN) dump(cat, f'../data/04_sequence/{name}_cat.joblib') dump(cont, f'../data/04_sequence/{name}_cont.joblib')
def subst_command(options, arglist): """ Run the command for each filename in arglist. """ for filename in arglist: cmd = util.expand(re.sub('%', filename, options.cmd)) psys(cmd, options)
def predict(model, x_global, x_local, x_ctx, box, **params): max_words = params['max_words'] # An entire batch must be run at once, but we only use the first slot in that batch indices = util.left_pad([words.START_TOKEN_IDX], **params) x_global = util.expand(x_global, 1) x_local = util.expand(x_local, 1) indices = util.expand(indices, 1) x_ctx = util.expand(x_ctx, 1) # Input is empty padding followed by start token output_words = [] for i in range(1, max_words): preds = model.predict([x_global, x_local, indices, x_ctx]) indices = np.roll(indices, -1, axis=1) indices[:, -1] = np.argmax(preds[:], axis=1) return words.words(indices[0])
def zero_state(self, batch_size, dtype): with tf.variable_scope('init', reuse=self.reuse): read_vector_list = [ expand(tf.tanh(learned_init(self.memory_vector_dim)), dim=0, N=batch_size) for i in range(self.read_head_num) ] w_list = [ expand(tf.nn.softmax(learned_init(self.memory_size)), dim=0, N=batch_size) for i in range(self.read_head_num + self.write_head_num) ] controller_init_state = self.controller.zero_state( batch_size, dtype) if self.init_mode == 'learned': M = expand(tf.tanh( tf.reshape( learned_init(self.memory_size * self.memory_vector_dim), [self.memory_size, self.memory_vector_dim])), dim=0, N=batch_size) elif self.init_mode == 'random': M = expand(tf.tanh( tf.get_variable( 'init_M', [self.memory_size, self.memory_vector_dim], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.5))), dim=0, N=batch_size) elif self.init_mode == 'constant': M = expand(tf.get_variable( 'init_M', [self.memory_size, self.memory_vector_dim], initializer=tf.constant_initializer(1e-6)), dim=0, N=batch_size) return NTMControllerState(controller_state=controller_init_state, read_vector_list=read_vector_list, w_list=w_list, M=M)
def iterate_command(options, arglist): """ Run a command once for each of a sequence of numbers. Possible enhancements would be to handle low/high/step tuples, and to handle an arbitrary comma delimited list of values. """ (low, high) = options.irange.split(':') for idx in range(int(low), int(high)): cmd = util.expand(re.sub('%', str(idx), options.cmd)) psys(cmd, options)
def global_histogram(self, input): out = self.quantization.encode_nn( input) # batch x 313 x imsize x imsize out = out.type(torch.FloatTensor) # change it to tensor X_onehotsum = torch.sum(torch.sum(out, dim=3), dim=2) # sum it up to batch x 313 X_hist = torch.div(X_onehotsum, util.expand( torch.sum(X_onehotsum, dim=1).unsqueeze(1), X_onehotsum)) # make 313 probability return X_hist
def predict(model, x_global, x_local, x_ctx, box, temperature=.0): indices = util.left_pad([]) #x0, x1, y0, y1 = box #coords = [0, (y0 + y1) / 2, (x0 + x1) / 2] likelihoods = [] for i in range(MAX_WORDS): preds = model.predict([ util.expand(x_global), util.expand(x_local), util.expand(indices), util.expand(x_ctx) ]) preds = preds[0] indices = np.roll(indices, -1) if temperature > 0: indices[-1] = sample(preds, temperature) else: indices[-1] = np.argmax(preds, axis=-1) likelihoods.append(preds[indices[-1]]) return words.words(indices), np.mean(likelihoods)
def process(df_sk_id_curr, item): name, df = item print(f'--- {name} ---') cont = {} drop_cols = [column for column in df.columns if column.startswith('SK_ID')] for sk_id_curr in tqdm(df_sk_id_curr['SK_ID_CURR']): data = df[df['SK_ID_CURR'] == sk_id_curr].sort_values( SORT_KEYS[name]).tail(MAX_LEN) data = data.drop(drop_cols, axis=1) cont[sk_id_curr] = expand(data.values, MAX_LEN) dump(cont, f'../data/06_onehot_seq/{name}.joblib')
def load(self, cfg_file): json_obj = json.loads(open(cfg_file).read()) self.attr['title'] = json_obj['title'] if json_obj.__contains__('title') else 'welcome to soapy' if json_obj.__contains__('source_files'): self.attr['source_files'] = expand(json_obj['source_files']) else: print 'You need to specify source files with "source_files" attr.' sys.exit(0) self.attr['description'] = json_obj['description'] if json_obj.__contains__('description') else 'description' self.attr['entry'] = json_obj['entry'] if json_obj.__contains__('entry') else 'entry' self.attr['subtitle1'] = json_obj['subtitle1'] if json_obj.__contains__('subtitle1') else 'subtitle1' self.attr['subtitle2'] = json_obj['subtitle2'] if json_obj.__contains__('subtitle2') else 'subtitle2'
def xargs_wrap(cmd, rble): """ Do xargs wrapping to cmd, distributing args from file rble across command lines. """ tcmd = cmd rval = [] for line in rble: bline = line.strip() for item in bline.split(" "): tcmd = util.expand(re.sub('%', item + ' %', tcmd)) pending = True if 240 < len(tcmd): tcmd = re.sub(r'\s*%\s*', '', tcmd) rval.append(tcmd) pending = False tcmd = cmd if pending: tcmd = re.sub(r'\s*%\s*', '', tcmd) rval.append(tcmd) return(rval)
i += 1 return words def expand_macro(arg: str) -> list: """Returns a list of whatever the macro expands into, or if not a macro, just the arg in a list.""" # character macro if charroll := character.get_current_character_roll(arg): return split_macro(charroll) # normal macro if arg in mac.macros: return split_macro(mac.macros[arg]) return [arg] # expand everything util.expand(args, [expand_macro, lambda arg: expand_delimiters(arg, delimiters)]) # interpret commas for grouping i = 0 while i < len(args): if args[i] == comma: # find indices of all other commas at this depth, and where depth starts and ends comma_indices = [i] # find where this depth started depth = 0 j = i while depth >= 0 and j > 0: j -= 1 if args[j] == right_paren: depth += 1
for each in patterns: seqNums.append(each.squence) maxSeqs = u.maxSeq(seqNums) print ("The sequential patterns :") for i in maxSeqs: for sth in i: print "[", for ssth in sth: print ssth, print "]", print "" print >> ff, "The sequential patterns :" for i in maxSeqs: for sth in i: print >> ff, "[", for ssth in sth: print >> ff, ssth, print >> ff, "]", print >> ff, "" ff.close() flitedSeqs = u.fliter(maxSeqs) expandedSeqs = u.expand(maxSeqs) maxStages = u.genPlotDatas(maxSeqs) flitedStages = u.genPlotDatas(flitedSeqs) expandedStages = u.genPlotDatas(expandedSeqs) allStages = [] allStages += [maxStages] allStages += [flitedStages] allStages += [expandedStages] u.drawStages(allStages)
for each in patterns: seqNums.append(each.squence) maxSeqs = u.maxSeq(seqNums) print("The sequential patterns :") for i in maxSeqs: for sth in i: print "[", for ssth in sth: print ssth, print "]", print "" print >> ff,"The sequential patterns :" for i in maxSeqs: for sth in i: print >> ff,"[", for ssth in sth: print >> ff,ssth, print >> ff,"]", print >> ff,"" ff.close() flitedSeqs = u.fliter(maxSeqs) expandedSeqs = u.expand(maxSeqs) maxStages = u.genPlotDatas(maxSeqs) flitedStages = u.genPlotDatas(flitedSeqs) expandedStages = u.genPlotDatas(expandedSeqs) allStages = [] allStages += [maxStages] allStages += [flitedStages] allStages += [expandedStages] u.drawStages(allStages)
def train(): TIMESTAMP = "{0:%Y-%m-%d-%H-%M/}".format(datetime.now()) log.log_info('program start') data, num_good, num_bad = util.load_train_data(num_data // 2) log.log_debug('Data loading completed') # resample data, length = util.resample(data, 600) data = util.reshape(data, length) good_data_origin = data[:num_good, :] bad_data_origin = data[num_good:, :] # extract bad data for test and train permutation = list(np.random.permutation(len(bad_data_origin))) shuffled_bad_data = bad_data_origin[permutation, :] test_bad_data = shuffled_bad_data[:int(num_bad * 0.3), :] train_bad_data_origin = shuffled_bad_data[int(num_bad * 0.3):, :] # extract corresponding good data for test and train permutation = list(np.random.permutation(len(good_data_origin))) shuffled_good_data = good_data_origin[permutation, :] test_good_data = shuffled_good_data[:len(test_bad_data), :] train_good_data = shuffled_good_data[len(test_bad_data):, :] assert len(test_bad_data) == len(test_good_data) # construct test data test_y = np.array([1.] * len(test_good_data) + [0.] * len(test_bad_data), dtype=np.float).reshape( (len(test_bad_data) + len(test_good_data), 1)) test_x = np.vstack((test_good_data, test_bad_data)) # expand the number of bad data for train train_x = np.vstack((train_good_data, train_bad_data_origin)) train_y = np.array([1.] * len(train_good_data) + [0.] * len(train_bad_data_origin), dtype=np.float).reshape( (len(train_bad_data_origin) + len(train_good_data), 1)) train_x, train_y, num_expand = util.expand(train_x, train_y) # regularize for i in range(len(train_x)): train_x[i, :, 0] = util.regularize(train_x[i, :, 0]) train_x[i, :, 1] = util.regularize(train_x[i, :, 1]) train_x[i, :, 2] = util.regularize(train_x[i, :, 2]) for i in range(len(test_x)): test_x[i, :, 0] = util.regularize(test_x[i, :, 0]) test_x[i, :, 1] = util.regularize(test_x[i, :, 1]) test_x[i, :, 2] = util.regularize(test_x[i, :, 2]) # random train_x, train_y = util.shuffle_data(train_x, train_y) log.log_debug('prepare completed') log.log_info('convolution layers: ' + str(conv_layers)) log.log_info('filters: ' + str(filters)) log.log_info('full connected layers: ' + str(fc_layers)) log.log_info('learning rate: %f' % learning_rate) log.log_info('keep prob: ' + str(keep_prob)) log.log_info('the number of expanding bad data: ' + str(num_expand)) log.log_info('mini batch size: ' + str(mini_batch_size)) if mini_batch_size != 0: assert mini_batch_size <= len(train_x) cnn = Cnn(conv_layers, fc_layers, filters, learning_rate) (m, n_W0, n_C0) = train_x.shape n_y = train_y.shape[1] # construction calculation graph cnn.initialize(n_W0, n_C0, n_y) cost = cnn.cost() optimizer = cnn.get_optimizer(cost) predict, accuracy = cnn.predict() init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: # log for tensorboard merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter("resource/tsb/train/" + TIMESTAMP, sess.graph) test_writer = tf.summary.FileWriter("resource/tsb/test/" + TIMESTAMP) if enable_debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) sess.run(init) for i in range(1, num_epochs + 1): if mini_batch_size != 0: num_mini_batches = int(m / mini_batch_size) mini_batches = util.random_mini_batches(train_x, train_y, mini_batch_size) cost_value = 0 for mini_batch in mini_batches: (mini_batch_x, mini_batch_y) = mini_batch _, temp_cost = sess.run([optimizer, cost], feed_dict={cnn.x: mini_batch_x, cnn.y: mini_batch_y, cnn.keep_prob: keep_prob}) cost_value += temp_cost cost_value /= num_mini_batches else: _, cost_value = sess.run([optimizer, cost], feed_dict={cnn.x: train_x, cnn.y: train_y, cnn.keep_prob: keep_prob}) # disable dropout summary_train, train_accuracy = sess.run([merged, accuracy], feed_dict={cnn.x: train_x, cnn.y: train_y, cnn.keep_prob: 1}) summary_test, test_accuracy = sess.run([merged, accuracy], feed_dict={cnn.x: test_x, cnn.y: test_y, cnn.keep_prob: 1}) train_writer.add_summary(summary_train, i - 1) test_writer.add_summary(summary_test, i - 1) if print_detail and (i % 10 == 0 or i == 1): info = '\nIteration %d\n' % i + \ 'Cost: %f\n' % cost_value + \ 'Train accuracy: %f\n' % train_accuracy + \ 'Test accuracy: %f' % test_accuracy log.log_info(info) # stop when test>0.95 and train>0.99 if test_accuracy >= 0.95 and train_accuracy >= 0.99: info = '\nIteration %d\n' % i + \ 'Cost: %f\n' % cost_value + \ 'Train accuracy: %f\n' % train_accuracy + \ 'Test accuracy: %f' % test_accuracy log.log_info(info) saver.save(sess, "resource/model/" + TIMESTAMP) break saver.save(sess, "resource/model/" + TIMESTAMP) train_writer.close() test_writer.close() log.log_info('program end')
a, b = k return toBitList(a), toBitList(b) # All 4 are bit lists lf0, rf0 = helper(splitInHalf(pout0)) lf1, rf1 = helper(splitInHalf(pout1)) # Is a bit list cons = toBitList(''.join(map(util.byteToBitString, util.xor))[32:]) # Are bit lists rPrime = util.xorBitList(rf0, rf1) cPrime = util.applyBitPermutation(util.pinv, util.xorBitList(rPrime, cons)) e0 = util.expand(lf0) e1 = util.expand(lf1) co1 = [] # Inputs that go into S box for 1st plaintext co2 = [] # Inputs that go into S box for 2nd plaintext c = [] for j in util.sBoxesForThisXor: co1.append(bitListToInt(getblock(j, e0, 6))) co2.append(bitListToInt(getblock(j, e1, 6))) c.append(bitListToInt(getblock(j, cPrime, 4))) # First guess a 6 bit value for k in range(64): # Try that for each S box relevant for this XOR value for i in range(len(util.sBoxesForThisXor)): # If the input and output match
default=True, help='Don\'t make a last-run file.') parser.add_argument('--rerun', dest='rerun', action='store_true', default=False, help='Do nothing else but re run the last deployment.') parser.add_argument('--colorless', dest='color', action='store_false', default=True, help='Don\'t use any colors.') parser.set_defaults(dry=False, copy=False) args = parser.parse_args() depot = util.expand(args.depot) #configurations configurations_file = os.path.join(depot, conf.CONFIGURATIONS_FILE_NAME) configurations_file_exists = os.path.isfile(configurations_file) if configurations_file_exists: configurations_parser = util.get_parser(configurations_file) configurations_parse_succes = not (configurations_parser is None) def deploy(): """ Deploy SUS entirely """ deploy_configurations()