def is_base_feed(dirpath): """Check if a directory is a feed directory. It is affirmative if and only if (1) it is a directory; (2) contains at least one subdirectory named as a 4-digit string as the feed version; (3) under the feed version directory there is a directory named `versions`; and (4) under `versions` there is some all-numeric files or directories Args: dirpath (str): Directory path to check Returns: bool for whether a directory looks like a feed directory """ if not os.path.isdir(dirpath): return False # not a directory ver_dir = ( os.listdir(dirpath) | where(lambda name: len(name) == 4 and name.isdigit()) # 4-digit | select(lambda name: os.path.join(dirpath, name)) | where(os.path.isdir) # feed ver is a dir | select(lambda path: os.path.join(path, 'versions')) | where(os.path.isdir) # data ver is a dir | where(lambda path: any(name.isdigit() for name in os.listdir(path))) # all-digit | as_list) # list of "feed_name/0001/versions" return False if not ver_dir else True
def list_feed_subfeeds(feedroot, feedname, feedver, subfeednames=()): """Assume the feed data directory exists. Return the list of subfeed names in ascending order Args: feedroot (str): root dir of the feed repository feedname (str): name of feed, also as the directory under feedroot feedver (str): feed version, also as the directory under feedname subfeednames (list): string of list of strings of partial names of subfeed Returns: list of list of strings, each is a valid subfeed. If subfeednames is provided, only those under the provided partial subfeed name is returned """ feeddir = os.path.join(feedroot, feedname, feedver) dirpath = os.path.join(feeddir, *subfeednames) if not os.path.isdir(dirpath): return [] # not a directory or not exists subfeeds = ( os.walk(dirpath) | where(lambda rootdirfile: "versions" in rootdirfile[1]) | select(lambda rootdirfile: rootdirfile[0][len(feeddir):]) | sort | select(lambda dirname: list(filter(None, dirname.split(os.sep)))) | where(lambda dirparts: "versions" not in dirparts) | as_list) return subfeeds
def finalize_data_preparation(total_seq, total_data_percent): total_data_size = int(len(total_seq) * total_data_percent / 100) training_data_size = int(total_data_size * 60 / 100) random.shuffle(total_seq) training_data = np.asarray( list(total_seq[:training_data_size] | select(lambda x: x[1]))) training_labels = np.asarray( list(total_seq[:training_data_size] | select(lambda x: x[0]))) test_data = np.asarray( list(total_seq[training_data_size:total_data_size] | select(lambda x: x[1]))) test_labels = np.array( list(total_seq[training_data_size:total_data_size] | select(lambda x: x[0]))) # VISUALIZATION print(f'training seq {len(training_data)}:') print(training_data[0]) print(f'test seq {len(test_data)}:') print(test_data[0]) return { 'training_data': training_data, 'training_labels': training_labels, 'test_data': test_data, 'test_labels': test_labels }
def write_to_file(output_file, prices): file_exists = os.path.isfile(output_file) with open(output_file, "a") as f: if not file_exists: f.write(",".join(sorted(prices[0].keys())) + "\n") for p in prices: sorted_values = list(p.items()) | sort() | select(lambda x: x[1]) f.write((sorted_values | select(str) | join(",")) + "\n")
def match_faces(self, faceencod, known_faces, tol): knonwn_encodes = known_faces | select(lambda f: f["encod"]) | tolist matches = face_recognition.compare_faces(knonwn_encodes, faceencod, tol) # Select only matched records return zip(matches, known_faces) \ | where(lambda x: x[0]) \ | select(lambda m: m[1]) \ | tolist
def balancedSignalGenerator(X,y,num_classes=12): class_map = {} for c in range(num_classes): class_map[c] = list(np.where( y==c)[0]) D = range(num_classes) | select( lambda c: class_map[c] | pcycle | select( lambda i: (c,X[i]) ) ) | as_list while True: for c in D: yield next(c)[0], next(c)[1]
def test_right_or(self): ror_piped = (range(100) | where(lambda x: x % 2 == 1) | select(lambda x: x**2) | select(lambda x: x - 1) | where(lambda x: x < 50)) or_pipe = (where(lambda x: x % 2 == 1) | select(lambda x: x**2) | select(lambda x: x - 1) | where(lambda x: x < 50)) lror = list(ror_piped) lor = list(range(100) | or_pipe) self.assertEqual(lror, lor)
def __init__(self): self.face_database = os.environ.get("FACE_DATABASE", "") self.match_tol = float(os.environ.get("FACE_MATCH_TOL", 0.6)) self.known_faces = self.load_known_faces() known_faces_names = self.known_faces | select( lambda f: f["name"]) | tolist logger.debug(f"known faces: {known_faces_names}") super().__init__()
def prepare_data(names, last_names, names_max_len, genders, dates, total_data_percent): data_seq = normalize_merge_data(names, last_names, names_max_len, genders, dates) rev_data_seq = data_seq.copy() rev_data_seq.reverse() shifted_data_seq = data_seq.copy() shifted_data_seq.insert(0, shifted_data_seq.pop()) total_seq = prepare_seq(data_seq, data_seq, 1) total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1) total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1) total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1) total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1) total_seq += prepare_seq(data_seq, rev_data_seq, 0) total_seq += prepare_seq(data_seq, shifted_data_seq, 0) # total_seq += list(map(lambda x: (x[0], [x[1][3], x[1][4], # x[1][5], x[1][0], x[1][1], x[1][2]]), total_seq)) return finalize_data_preparation(total_seq, total_data_percent)
def is_feed(dirpath): """Similar to is_base_feed(), but also cover feeds with subfeeds """ if not os.path.isdir(dirpath): return False # not a directory ver_dir = ( os.listdir(dirpath) | where(lambda name: len(name) == 4 and name.isdigit()) # 4-digit | select(lambda name: os.path.join(dirpath, name)) | where(os.path.isdir) # feed ver is a dir | as_list) # list of "feed_name/0001/versions" for dirname in ver_dir: for root, _dirs, files in os.walk(dirname): if root.rsplit(os.sep, 1)[-1] == 'versions' and \ any(name.isdigit() for name in files): # all-digit files return True return False
def handle_msg(self, msg): matches = [] for new_face in msg.faces: # Converting byte format back to NumPy array new_face = np.frombuffer(new_face) logger.debug(f"type of new_face: {type(new_face)}") matches.extend( self.match_faces(new_face, self.known_faces, self.match_tol)) if matches: titles = matches | select(lambda m: m["name"]) | tolist msg.matched_faces.extend(titles) logger.debug(f"match found: {titles}") yield True, msg else: logger.debug("New face found. Updating the database...") save_image_data_to_jpg(msg.raw_frame.image_bytes, outpath=self.face_database) self.known_faces = self.load_known_faces()
def test_pipe(): def fib(): a, b = 0, 1 while True: yield a a, b = b, a + b # 计算小于4000000的斐波那契数中的偶数之和 amount = fib() | where(lambda x: x % 2 == 0) | take_while( lambda x: x < 4000000) | add() print(amount) # 读取文件,统计文件中每个单词出现的次数,然后按照次数从高到低对单词排序 with open('argparse.py') as f: fs = f.read() print(findall('\w+', fs)) print(fs | Pipe(lambda x: findall('\w+', x)) # | Pipe(lambda x: (i for i in x if i.strip())) | groupby(lambda x: x) | select(lambda x: (x[0], (x[1] | count))) | sort(key=lambda x: x[1], reverse=True))
def test_parallelepiped(self): par0 = (range(100) | where(lambda x: x % 2 == 1) | select(lambda x: x**2) | select(lambda x: x - 1) | where(lambda x: x < 50)) par1 = (range(100) | where(lambda x: x % 2 == 1) | (Parallelepiped() | select(lambda x: x**2)) | select(lambda x: x - 1) | where(lambda x: x < 50)) par2 = (range(100) | where(lambda x: x % 2 == 1) | (Parallelepiped() | select(lambda x: x**2) | select(lambda x: x - 1)) | where(lambda x: x < 50)) l0 = list(par0) l1 = list(par1) l2 = list(par2) self.assertEqual(l0, l1) self.assertEqual(l0, l2)
from pipe import select, where my_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] print(list(my_list | where(lambda x: x % 2 == 0))) updated_list = my_list \ | select(lambda x: x * 3) \ | where(lambda x: x % 2 == 0) print(list(updated_list))
def date_to_str_sequence(seq): return list(seq | select(lambda x: x.strftime("%Y%m%d")))
discriminator.to_gpu() if args.use_vectorizer: vectorizer.to_gpu() if args.classifier_training_attribute_dataset is not None: classifier.to_gpu() updater = kawaii_creator.updaters.Updater( generator=generator, discriminator=discriminator, xp=xp, batchsize=batchsize, generator_input_dimentions=GENERATOR_INPUT_DIMENTIONS) if args.use_vectorizer: vectorizer_updater = kawaii_creator.updaters.VectorizerUpdater(vectorizer) if args.classifier_training_attribute_dataset is not None: classifier_updater = kawaii_creator.updaters.ClassifierUpdater(classifier) count_processed, sum_loss_discriminator, sum_loss_generator, sum_accuracy, sum_loss_classifier, sum_accuracy_classifier = 0, 0, 0, 0, 0, 0 for batch in iterator | pipe.select(xp.array) | pipe.select(chainer.Variable): loss_generator = chainer.Variable(xp.zeros((), dtype=xp.float32)) loss_discriminator = chainer.Variable(xp.zeros((), dtype=xp.float32)) loss_vectorizer = chainer.Variable(xp.zeros((), dtype=xp.float32)) loss_classifier = chainer.Variable(xp.zeros((), dtype=xp.float32)) if args.generator_training: # forward generated, random_seed = updater.generate_random() discriminated_from_generated = updater.discriminator(generated) discriminated_from_dataset = updater.discriminator(batch) accuracy = updater.discriminator_accuracy(discriminated_from_generated=discriminated_from_generated, discriminated_from_dataset=discriminated_from_dataset) sum_accuracy += chainer.cuda.to_cpu(accuracy.data) # update generator loss_generator_each = updater.loss_generator(discriminated_from_generated=discriminated_from_generated) loss_generator += loss_generator_each
def test_fonctor(self): self.assertFEqual(("a", "bb", "ccc") | select(lambda s: len(s)), (1, 2, 3))
def ConvertIndexToLabel(indexes): return list( indexes ) | select( lambda i: imageTypesInverted[i] ) | as_list
class_map = {} for c in range(num_classes): class_map[c] = list(np.where( y==c)[0]) D = range(num_classes) | select( lambda c: class_map[c] | pcycle | select( lambda i: (c,X[i]) ) ) | as_list while True: for c in D: yield next(c)[0], next(c)[1] data = balancedSignalGenerator(X_train,y_train) | take(4200) | as_list # note that we need as_list on the data X_train_bal = data | select(lambda el: el[1]) | as_list | as_npy y_train_bal = data | select(lambda el: el[0]) | as_list | as_npy #%% print( X_train_bal.shape, y_train_bal.shape ) #%% [markdown] # And we are now balanced! #%% def labelDist(y, title): plt.figure() plt.hist(y,bins=12) plt.title(title) plt.xticks(range(12),imageTypes.keys(), rotation='vertical')
def test_pipe(): print range(5) | add print range(5) | where(lambda x: x % 2 == 0) | add print fibonacci() | where(lambda x: x % 2 == 0) | take_while(lambda x: x < 10000) | add print fibonacci() | select(lambda x: x ** 2) | take_while(lambda x: x < 100) | as_list print fibonacci() | take_while_idx(lambda x: x < 10) | as_list
import argparse import pathlib import sys import pipe import glob thisfilepath = pathlib.Path(__file__) sys.path.append(str(thisfilepath.parent.parent.parent)) import chainer_progressive_gan import train_conditional if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('dataset_glob') parser.add_argument('--edge', action=argparse._StoreTrueAction) train_conditional.shared_args(parser) args = parser.parse_args() args.prefix = "edge2img" + ("_edge" if args.edge else "") paths = glob.glob(args.dataset_glob) | pipe.select( pathlib.Path) | pipe.as_list if args.edge: dataset = chainer_progressive_gan.datasets.Edge2ImgDataset( paths, resize=(args.resize, args.resize)) else: dataset = chainer_progressive_gan.datasets.Sketch2ImgDataset( paths, resize=(args.resize, args.resize)) train_conditional.main(args, dataset)
def work(in_train_arch, in_test_arch, in_train_csv, in_test_csv, out_h5): from pypipes import unzip,as_key,del_key,getitem,setitem from nppipes import (genfromtxt, place,astype,as_columns,label_encoder,fit_transform, transform,stack ) from nppipes import take as np_take from numpy.core.defchararray import strip from numpy import s_,mean,in1d,putmask from collections import Counter from h5pipes import h5new @P.Pipe def replace_missing_with(iterable, ftor): from numpy import isnan for item in iterable: for i in range(item.shape[1]): mask = isnan(item[:, i]) value = ftor(item[~mask, i]) item[mask, i] = value pass yield item missing_cidx = [11, 14, 16, 28, 33, 34, 35, 36, 37, 46, 51, 60, 68] unseen_nominal_cidx = [2, 12, 38, 69, 74] seen_nominal_cidx = [0, 1, 4, 5, 6, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 75, 76, 77] nominal_cidx = seen_nominal_cidx + unseen_nominal_cidx data = ( in_train_arch | unzip(in_train_csv) | genfromtxt(delimiter=',', dtype=str) | place(lambda d: d == '', 'nan') | as_key('train') | as_key('train_col_names', lambda d: strip(d['train'][0], '"')) | as_key('train_labels', lambda d: d['train'][1:, 0].astype(int)) | as_key('train_X', lambda d: d['train'][1:, 1:-1]) | as_key('train_y', lambda d: d['train'][1:, -1].astype(int)) | del_key('train') | as_key('test', lambda d: in_test_arch | unzip(in_test_csv) | genfromtxt(delimiter=',', dtype=str) | place(lambda d: d == '', 'nan') | P.first ) | as_key('test_col_names', lambda d: strip(d['test'][0], '"')) | as_key('test_labels', lambda d: d['test'][1:, 0].astype(int)) | as_key('test_X', lambda d: d['test'][1:, 1:]) | del_key('test') | as_key('train_X', lambda d: (d['train_X'],) | np_take(missing_cidx, axis=1) | astype(float) | replace_missing_with(mean) | astype(str) | setitem(d['train_X'].copy(), s_[:, missing_cidx]) | P.first ) | as_key('label_encoders', lambda d: len(nominal_cidx) | label_encoder | P.as_tuple ) | as_key('train_X', lambda d: (d['train_X'],) | np_take(nominal_cidx, axis=1) | as_columns | fit_transform(d['label_encoders']) | stack(axis=1) | setitem(d['train_X'].copy(), s_[:, nominal_cidx]) | P.first ) | as_key('test_X', lambda d: (d['test_X'],) | np_take(seen_nominal_cidx, axis=1) | as_columns | transform(d['label_encoders'][:-len(unseen_nominal_cidx)]) | stack(axis=1) | setitem(d['test_X'].copy(), s_[:, seen_nominal_cidx]) | P.first ) | as_key('test_X', lambda d: (d['test_X'],) | np_take(unseen_nominal_cidx, axis=1) | as_key('test_unseen_nominals_features') | as_key('test_unseen_nominals', lambda d2: zip(d2['test_unseen_nominals_features'].T, d['label_encoders'][-len(unseen_nominal_cidx):]) | P.select(lambda t: list(set(t[0]) - set(t[1].classes_))) | P.as_list ) | as_key('train_most_common_nominals', lambda d2: zip(d['train_X'][:, unseen_nominal_cidx].T.astype(int), d['label_encoders'][-len(unseen_nominal_cidx):]) | P.select(lambda t: t[1].inverse_transform(t[0])) | P.select(lambda s: Counter(s).most_common(1)[0][0]) | P.as_list ) | as_key('test_corrected_features', lambda d2: zip(d2['test_unseen_nominals_features'].copy().T, d2['test_unseen_nominals'], d2['train_most_common_nominals']) | P.select(lambda t: putmask(t[0], in1d(t[0], t[1]), t[2]) or t[0].T) | stack(axis=1) | P.first ) | getitem('test_corrected_features') | as_columns | transform(d['label_encoders'][-len(unseen_nominal_cidx):]) | stack(axis=1) | setitem(d['test_X'].copy(), s_[:, unseen_nominal_cidx]) | P.first ) | del_key('label_encoders') | as_key('test_X', lambda d: (d['test_X'],) | np_take(missing_cidx, axis=1) | astype(float) | replace_missing_with(mean) | astype(str) | setitem(d['test_X'].copy(), s_[:, missing_cidx]) | P.first ) | P.first ) #print(data.keys()) ( (out_h5,) | h5new | as_key('train_X', lambda _: data['train_X'].astype(float)) | as_key('train_y', lambda _: data['train_y'].astype(float)) | as_key('test_X', lambda _: data['test_X'].astype(float)) | as_key('train_labels', lambda _: data['train_labels']) | as_key('test_labels', lambda _: data['test_labels']) | P.first ) return
def to_padded_int_sequence(seq, padding_max_len): padded_int_seq = pad_sequences( list(seq | select(lambda x: list(x | select(ord)))), maxlen=padding_max_len) return padded_int_seq
# -*- coding: utf-8 -*- import csv import argparse import os import pipe import texmex_python parser = argparse.ArgumentParser() parser.add_argument("csv_path") parser.add_argument("fvec_path") args = parser.parse_args() assert not os.path.exists(args.fvec_path) writer = texmex_python.Writer(args.fvec_path, 'f') with open(args.csv_path) as fr: reader = csv.reader(fr) for vec in reader | pipe.select(lambda vec: list(map(float, vec))): writer.write(vec)
def __init__(self, tsv_path): self._data = numpy.array([ line.split(" ") | pipe.select(lambda x: x == "1") | pipe.select(int) | pipe.as_list for line in open(tsv_path) ])
from pipe import select, where, chain, traverse, groupby, dedup arr = [1, 2, 3, 4, 5] print(list(map(lambda x: x * 2, filter(lambda x: x % 2 == 0, arr)))) print(list(arr | where(lambda x: x % 2 == 0) | select(lambda x: x * 2))) print(list(arr | select(lambda x: x * 2))) nested = [[1, 2, [3]], [4, 5]] print(list(nested | chain)) print(list(nested | traverse)) fruits = [ { "name": "apple", "price": [2, 5] }, { "name": "orange", "price": 4 }, { "name": "grape", "price": 5 }, ] print(list(fruits | select(lambda fruit: fruit["price"]) | traverse)) print( list((1, 2, 3, 4, 5, 6, 7, 8, 9) | groupby(lambda x: "Even" if x % 2 == 0 else "Odd") | select(lambda x: {x[0]: list(x[1])}))) print( list((1, 2, 3, 4, 5, 6, 7, 8, 9) | groupby(lambda x: "Even" if x % 2 == 0 else "Odd")