Пример #1
0
    def fit_transform(self, database):
        def func(col):
            mean, std = np.nanmean(col.data), np.nanstd(col.data)
            col.data = (col.data - mean) / std
            np.nan_to_num(col.data, copy=False)

        parallel(func, database.num_columns)
Пример #2
0
def crawl_pkgs_meta(packages, target_dir, workers):
    pkgs_dict = LazyBucketDict(target_dir)
    args_list = [(name, pkgs_dict) for name in packages]
    if workers > 1:
        utils.parallel(save_pkg_meta, zip(*args_list), workers=workers)
    else:
        [save_pkg_meta(*args) for args in args_list]
    pkgs_dict.save()
Пример #3
0
    def fit_transform(self, database):
        todo_list = []
        for table in database.tables.values():
            for cat_col in table.cat_columns:
                for num_col in table.num_columns:
                    if not cat_col.keyindex.is_one:
                        todo_list.append((cat_col, num_col))

        def func(args):
            cat_col, num_col = args
            mean, std, skew, kurt = moment_group_by(cat_col.keyindex,
                                                    num_col.data)
            mean[0] = std[0] = skew[0] = kurt[0] = np.nan
            return mean[cat_col.data], std[cat_col.data], \
                   skew[cat_col.data], kurt[cat_col.data]

        rets = parallel(func, todo_list)

        for (cat_col, num_col), (mean, std, skew,
                                 kurt) in zip(todo_list, rets):
            self.engine.cache_column(
                cat_col.table.name,
                f'nMean({num_col.name})GroupBy({cat_col.name})', 'num', mean)
            self.engine.cache_column(
                cat_col.table.name,
                f'nStd({num_col.name})GroupBy({cat_col.name})', 'num', std)
            self.engine.cache_column(
                cat_col.table.name,
                f'nSkew({num_col.name})GroupBy({cat_col.name})', 'num', skew)
            self.engine.cache_column(
                cat_col.table.name,
                f'nKurt({num_col.name})GroupBy({cat_col.name})', 'num', kurt)
Пример #4
0
    def snapshot(self, community):
        home = self.home / community.country
        tarname = '{}-{}.tar.bz2'.format(
            '-'.join(
                sorted(
                    set(pathlib.PurePath(e.path).stem
                        for e in self.endpoints))), int(time()))
        home.mkdir(parents=True, exist_ok=True)
        tar = tarfile.open(str(home / tarname), mode='w:bz2')
        with Cache(home / 'timestamps.json') as cache:
            resources = ((resource, cache.get(resource.url))
                         for resource in chain(*(s.resources(self.endpoints)
                                                 for s in community)))

            total_waited_time = 0
            for resource, request, timestamp, total in utils.parallel(
                    resources, Snapshot.fetch):
                total_waited_time += total
                cache.set(resource.url, timestamp)
                filename = str(
                    resource.server) + '/' + resource.endpoint.encode()
                info = tarfile.TarInfo(name=filename)
                info.size = len(request.content)
                data = io.BytesIO(request.content)
                tar.addfile(info, fileobj=data)
        tar.close()
        return total_waited_time
Пример #5
0
    def fit_transform(self, database):
        # Build keyindex
        def func(col):
            order = col.table.order
            order_data = order.data if order is not None else None
            # TODO
            # set_time = True if order is not None and order.type == 'time' else False
            set_time = False
            col.keyindex = build_key_index(col.data, order_data,
                                           col.block['unique'], set_time)

        parallel(func, database.cat_columns)

        # Remove all order attributes
        for table in database.tables.values():
            order = table.order
            if order is not None and order.type == 'order':
                table.drop_column(order.name)
Пример #6
0
    def fit_transform(self, database):
        todo_list = database.attr_columns

        def func(col):
            return is_dup_column(col.type, col.data)

        rets = parallel(func, todo_list)
        for col, is_drop in zip(todo_list, rets):
            if is_drop:
                col.table.drop_column(col.name)
Пример #7
0
def step_opt(args: Namespace) -> int:
    args.opt_bc_list = []
    for opt in args.opt_levels or ["0"]:
        ret = parallel(
            f"timeout 1m opt {{}} -O{opt} {args.opt_flags} -o {{.}}-O{opt}.bc",
            args.bc_list,
        )
        if ret > 0 and args.exit_on_error:
            return ret
        args.opt_bc_list.extend(
            [os.path.splitext(x)[0] + f"-O{opt}.bc" for x in args.bc_list]
        )
    return 0
Пример #8
0
 def run(self, dataset, test=False):
     mode = 'test' if test else 'train'
     U.log(f'Running algorithm in {mode} mode.')
     
     def _extract(user):
         return pd.DataFrame(self.extractor(user, self.meta, test))
     
     grouped = dataset.groupby('installation_id', sort=False)
     users = (g for _, g in grouped)
     if self.pbar:
         users = tqdm(users, total= grouped.ngroups)
     datasets = U.parallel(_extract, users, num_workers=self.num_workers)
     dataset = pd.concat(datasets, axis=0)
     dataset = dataset.reset_index(drop=True)
     return dataset
Пример #9
0
def main():
    dump_dir = sys.argv[1]
    workers = int(os.environ.get('WORKERS', "1"))
    pypi_fetcher_dir = os.environ.get('pypi_fetcher')
    print(f'Index directory: {pypi_fetcher_dir}')
    assert isdir(pypi_fetcher_dir)
    for bucket in LazyBucketDict.bucket_keys():
        pypi_dict = LazyBucketDict(f"{pypi_fetcher_dir}/pypi")
        dump_dict = LazyBucketDict(dump_dir, restrict_to_bucket=bucket)
        print(f"Prune bucket {bucket}")
        prune_entries(bucket, pypi_dict, dump_dict)
        pypi_dict.save()
        dump_dict.save()
        print(f"Calculating jobs for bucket {bucket}")
        jobs = list(get_jobs(bucket, pypi_dict, dump_dict))
        if not jobs:
            continue
        print(f"Starting batch with {len(jobs)} jobs")
        func = mine_wheel_metadata_full_download
        if workers > 1:

            def f(job):
                return exec_or_return_exc(func, job)

            result = parallel(f, (jobs, ), workers=workers)
        else:
            result = [exec_or_return_exc(func, job) for job in jobs]
        for r in result:
            if isinstance(r, Exception):
                continue
            name = r.job.name
            ver = r.job.ver
            pyver = r.job.pyver
            fn = r.job.filename
            if name not in dump_dict:
                dump_dict[name] = {}
            if pyver not in dump_dict[name]:
                dump_dict[name][pyver] = {}
            if ver not in dump_dict[name][pyver]:
                dump_dict[name][pyver][ver] = {}
            dump_dict[name][pyver][ver][fn] = {}
            for key in ('requires_dist', 'provides_extras',
                        'requires_external', 'requires_python'):
                val = getattr(r, key)
                if val:
                    dump_dict[name][pyver][ver][fn][key] = val
        compress(dump_dict)
        dump_dict.save()
Пример #10
0
    def forward(self, img):
        if self.quantize:
            img = np.float_(np.int_(img * 255.0))
            img_parallel = parallel(img, stride=1)
            for i in range(img_parallel.shape[1]):
                patch = img_parallel[:, i].reshape((8, 8))
                #patch = (patch-patch.mean())/(10)
                img_parallel[:, i] = convolve2d(patch, self.param,
                                                mode='same').reshape(
                                                    img_parallel[:, i].shape)
            img_parallel = 0.5 * np.sign(img_parallel + self.bias) + 0.5
            img_ht = np.where(
                unparallel_grad(img_parallel, img, stride=1) > 32.0,
                np.ones_like(img), np.zeros_like(img))

        return img_ht
Пример #11
0
def extract_event_data(df, features=DEFAULT_EVENT_FEATUERS,
                       num_workers=cpu_count(), pbar=False,
                       **opts):

    parse_row = EventParser(features)
    event_data = df.event_data
    if pbar:
        event_data = tqdm(event_data, desc='Processing events')
    df = pd.DataFrame(parallel(parse_row, event_data, num_workers))
    df = fillna(df, 'game_time', method='mean')
    df = fillna(df, 'coordinates.x', method='mode')
    df = fillna(df, 'coordinates.y', method='mode')
    df = fillna(df, 'coordinates.stage_height', method='mode')
    df = fillna(df, 'coordinates.stage_width', method='mode')
    df = fillna(df, 'description', method='mode', fallback='none')
    df = fillna(df, 'media_type', method='const', value='none')
    df = fillna(df, 'identifier', method='const', value='none')
    df = fillna(df, 'duration', method='mean')
    df = fillna(df, 'total_duration', method='mean')
    return df
Пример #12
0
    def update_cache_columns(self):
        """ 更新 cache_columns 到表
        筛除 NaN 过多和重复值过多的列
        目前只支持新增 num 属性列
    """
        def is_drop(args):
            return True if is_nan_column(*args) or is_dup_column(
                *args) else False
            # return False

        self._cache_columns = [(k, v) for k, v in self._cache_columns.items()]
        self._cache_columns.sort()
        ret = parallel(is_drop, [v for k, v in self._cache_columns])

        rest_table2data = {}
        for ((tname, attr),
             (typ, data)), drop_flag in zip(self._cache_columns, ret):
            if not drop_flag:
                self.database.tables[tname].add_column(attr, typ, data)
        self._cache_columns = {}
Пример #13
0
def main():
    workers = int(os.environ.get('WORKERS', "1"))
    pypi_fetcher_dir = os.environ.get('pypi_fetcher', '/tmp/pypi_fetcher')
    ensure_pypi_fetcher(pypi_fetcher_dir)
    init_db()
    build_base(store=os.environ.get('STORE', None))
    P = Package
    with Measure('Get processed pkgs from DB'):
        processed = set((p.name, p.version)
                        for p in P.select(P.name, P.version).distinct())
        print(f"DB contains {len(processed)} pkgs at this time")
    for bucket in LazyBucketDict.bucket_keys():
        with Measure("getting jobs"):
            jobs = get_jobs(pypi_fetcher_dir, bucket, processed, amount=1000)
            if not jobs:
                continue
        with Measure('batch'):
            if workers > 1:
                pool_results = utils.parallel(extract_requirements, (jobs, ),
                                              workers=workers,
                                              use_processes=False)
            else:
                pool_results = [extract_requirements(args) for args in jobs]
        results = []
        for i, res in enumerate(pool_results):
            if isinstance(res, Exception):
                print(f"Problem with {jobs[i].name}:{jobs[i].version}")
                if isinstance(res, sp.CalledProcessError):
                    print(res.stderr)
                traceback.print_exception(res, res, res.__traceback__)
            else:
                for r in res:
                    results.append(r)
        sleep(1)
        with db.atomic():
            with Measure('bulk insert'):
                Package.bulk_create([Package(**r) for r in results])
        if os.environ.get('CLEANUP', None):
            cleanup()
Пример #14
0
    def fit_transform(self, database):
        for table in sorted(database.tables.values()):
            id_col = table.id
            if id_col is None:
                todo_list = table.cat_columns

                def func(col):
                    return pd.Series(col.data).nunique(dropna=True)

                n_unique = np.array(parallel(func, todo_list), dtype=np.int32)
                idx = np.where(n_unique == table.n_lines)[0]
                if len(idx) == 1:
                    table.info[todo_list[idx[0]].name] = 'id'
                elif len(idx) > 1:
                    for col in todo_list[idx[1:]]:
                        table.drop_column(col.name)
                    logging.warning(
                        'More than one column in table %s have unique values: %s' % \
                            (table.name, todo_list[idx][0].global_name)) + \
                            ', we have dropped the unnecessary.'
                    table.info[todo_list[idx[0]].name] = 'id'
            else:
                assert pd.Series(id_col.data).nunique(dropna=True) == table.n_lines, \
                    '%s is not an id column' % id_col.global_name
def CW_attack_fast(img_0,
                   mean_cat_attack,
                   cov_cat_attack,
                   pi_cat_attack,
                   mean_grass_attack,
                   cov_grass_attack,
                   pi_grass_attack,
                   mean_cat_defense,
                   cov_cat_defense,
                   pi_cat_defense,
                   mean_grass_defense,
                   cov_grass_defense,
                   pi_grass_defense,
                   original_img,
                   truth,
                   l=5,
                   target_index=1,
                   stride=8,
                   alpha=0.0001,
                   display_iter=300,
                   title='',
                   path='./Outputs',
                   preprocessing=[None, None],
                   attack_type='blackbox'):
    iter_num = 0
    parallel_img_0 = parallel(img_0, stride=stride)
    img_k = img_0
    W_cat, w_cat, w_0_cat = get_parameters(mean_cat_attack, cov_cat_attack,
                                           pi_cat_attack)
    W_grass, w_grass, w_0_grass = get_parameters(mean_grass_attack,
                                                 cov_grass_attack,
                                                 pi_grass_attack)

    while iter_num < 300:
        iter_num += 1
        parallel_img_k = parallel(img_k, stride=stride)
        if attack_type == 'whitebox' and preprocessing[0] != None:
            parallel_img_k = preprocessing[0].forward(parallel_img_k)
            parallel_img_0 = preprocessing[0].forward(parallel_img_0)

        current_grad = gradient_CW(patch_vec_k=parallel_img_k,
                                   patch_vec_0=parallel_img_0,
                                   mean_cat=mean_cat_attack,
                                   cov_cat=cov_cat_attack,
                                   pi_cat=pi_cat_attack,
                                   mean_grass=mean_grass_attack,
                                   cov_grass=cov_grass_attack,
                                   pi_grass=pi_grass_attack,
                                   W_cat=W_cat,
                                   w_cat=w_cat,
                                   w_0_cat=w_0_cat,
                                   W_grass=W_grass,
                                   w_grass=w_grass,
                                   w_0_grass=w_0_grass,
                                   l=l,
                                   target_index=target_index)
        grad = unparallel_grad(current_grad, img_0, stride=stride)
        img_k_1 = np.clip(img_k - alpha * grad, 0, 1)
        change = np.linalg.norm((img_k_1 - img_k))
        img_k = img_k_1

        if (iter_num) % display_iter == 0:
            print("\n")
            display_image(img_perturbed=img_k_1,
                          mean_cat=mean_cat_defense,
                          cov_cat=cov_cat_defense,
                          pi_cat=pi_cat_defense,
                          mean_grass=mean_grass_defense,
                          cov_grass=cov_grass_defense,
                          pi_grass=pi_grass_defense,
                          original_img=original_img,
                          truth=truth,
                          title=title + 'iter_' + str(iter_num),
                          stride=stride,
                          preprocessing=preprocessing[1],
                          path=path)

            print(' Change:{}'.format(change))
        if change < 0.001 and stride == 8:
            print("\n\nMax Iteration:" + str(iter_num))
            break
        elif change < 0.01 and stride == 1:
            print("\n\nMax Iteration:" + str(iter_num))
            break

    return img_k_1
Пример #16
0
	def test_fapkc_encryption(Ring, block_size, stream_length, test_uncompiled=False, print_data=False):
		print("FAPKC encryption / decryption test")
		print(" algebra:", Ring, ", data block size =", block_size, ", stream length =", stream_length)
		
		Automaton = automaton_factory(Ring)
		ConstVector = Automaton.base_const_vector
		
		
		for memory_size in range(1, 33):
			print()
			print(" memory_size =", memory_size)
			text = [ConstVector.random(block_size) for i in range(stream_length)]
			print("  generating FAPKC0 key pair")
			start_time = time()
			encrypt, decrypt = Automaton.fapkc0(block_size=block_size, memory_size=memory_size)
			print("   time:", int(time() - start_time))
			
			if test_uncompiled:
				print("  encryption/decryption test")
				print("  encrypt... length =", stream_length)
				start_time = time()
				cipher_1 = list(encrypt(text))
				print("   time:", int(time() - start_time))
				if print_data:
					print(''.join(['{:02x}'.format(int(_x)) for _x in cipher_1]))
				
				print("  decrypt... length =", stream_length)
				start_time = time()
				text_1 = list(decrypt(cipher_1))
				print("   time:", int(time() - start_time))
				if print_data:
					print(''.join(['  '] * memory_size + ['{:02x}'.format(int(_x)) for _x in text]))
					print(''.join(['{:02x}'.format(int(_x)) for _x in text_1]))
				
				assert text_1[memory_size:] == text[:-memory_size]
			
			compiler = Compiler()
			with parallel(0):
				print("  compiling encrypt automaton")
				start_time = time()
				encrypt.compile('encrypt', compiler)
				print("   time:", int(time() - start_time))
				print("  compiling decrypt automaton")
				start_time = time()
				decrypt.compile('decrypt', compiler)
				print("   time:", int(time() - start_time))
			print("  code generation")
			code = compiler.compile()		
			encrypt = encrypt.wrap_compiled('encrypt', code)
			decrypt = decrypt.wrap_compiled('decrypt', code)
			print("   time:", int(time() - start_time))
			
			print("  testing compiled automata")
			with code:
				print("  encrypt... length =", stream_length)
				start_time = time()
				cipher_2 = list(encrypt(text))
				print("   time:", int(time() - start_time))
				if print_data:
					print(''.join(['{:02x}'.format(int(_x)) for _x in cipher_2]))
				
				print("  decrypt... length =", stream_length)
				start_time = time()
				text_2 = list(decrypt(cipher_2))
				print("   time:", int(time() - start_time))
				if print_data:
					print(''.join(['  '] * memory_size + ['{:02x}'.format(int(_x)) for _x in text]))
					print(''.join(['{:02x}'.format(int(_x)) for _x in text_2]))
Пример #17
0
def step_strip(args: Namespace) -> int:
    ret = parallel(f"opt {{}} -strip-debug -strip -o {{.}}-strip.bc", args.bc_list)
    if ret > 0 and args.exit_on_error:
        return ret
    args.bc_list = [os.path.splitext(x)[0] + "-strip.bc" for x in args.bc_list]
    return 0
Пример #18
0
	def automaton_test_suite(verbose=False):
		if verbose: print("running test suite")
		
		Automaton = automaton_factory(BooleanRing.get_algebra())
		Vector = Automaton.base_const_vector
		zero_v = Vector.zero(8)
		
		'''
		print()
		print("Testing nonlinear automata")
		for memory_size in range(1, 5):
			print()
			print("test for memory size", memory_size)
			print(" generating automata...")
			ls, li = Automaton.nonlinear_nodelay_wifa_pair(block_size=8, memory_size=memory_size)
			
			print(" compiling automata...")
			compiler = Compiler()
			with parallel(0):
				ls.compile('ls', compiler)
				li.compile('li', compiler)
			code = compiler.compile()
			ls = ls.wrap_compiled('ls', code)
			li = li.wrap_compiled('li', code)
			
			xi = [Vector.random(8) for _i in range(1024)]
			print(" xi =", ''.join(['{:02x}'.format(int(_x)) for _x in xi]))
			
			y = list(ls(xi))
			print(" y  =", ''.join(['{:02x}'.format(int(_x)) for _x in y]))
			
			xo = list(li(y))
			print(" xo =", ''.join(['{:02x}'.format(int(_x)) for _x in xo]))
			
			assert xi == xo
			print(" ok", memory_size)
		
		print()
		print("Testing linear automata")
		for memory_size in range(1, 5):
			print()
			print("test for memory size", memory_size)
			print(" generating automata...")
			ls, li = Automaton.linear_delay_wifa_pair(block_size=8, memory_size=memory_size)
			
			print(" compiling automata...")
			compiler = Compiler()
			with parallel(0):
				ls.compile('ls', compiler)
				li.compile('li', compiler)
			code = compiler.compile()
			ls = ls.wrap_compiled('ls', code)
			li = li.wrap_compiled('li', code)
			
			xi = [Vector.random(8) for _i in range(1024)]
			print(" xi =", ''.join(['{:02x}'.format(int(_x)) for _x in xi]))
			
			y = list(ls(xi + [Vector.random(8) for _i in range(memory_size)]))
			print(" y  =", ''.join(['{:02x}'.format(int(_x)) for _x in y]))
			
			xo = list(li(y))[memory_size:]
			print(" xo =", ''.join(['{:02x}'.format(int(_x)) for _x in xo]))
			
			assert xi == xo
			print(" ok", memory_size)
		'''
		
		print()
		print("Testing FAPKC0")
		for memory_size in range(1, 5):
			print()
			print("test for memory size", memory_size)
			print(" generating automata...")
			ls, li = Automaton.fapkc0(block_size=8, memory_size=memory_size)
			
			print(" composing identity automaton...")
			ll = ls @ li
			ll.optimize()
			print(ll.output_transition)
			print(ll.state_transition)
			
			print(" compiling automata...")
			compiler = Compiler()
			with parallel(0):
				ls.compile('ls', compiler)
				li.compile('li', compiler)
				ll.compile('ll', compiler)
			code = compiler.compile()
			ls = ls.wrap_compiled('ls', code)
			li = li.wrap_compiled('li', code)
			ll = ll.wrap_compiled('ll', code)
			
			xi = [Vector.random(8) for _i in range(1024)]
			print(" xi =", ''.join(['{:02x}'.format(int(_x)) for _x in xi]))
			
			y = list(ls(xi + [Vector.random(8) for _i in range(memory_size)]))
			print(" y  =", ''.join(['{:02x}'.format(int(_x)) for _x in y]))
			
			xo = list(li(y))[memory_size:]
			print(" xo =", ''.join(['{:02x}'.format(int(_x)) for _x in xo]))
			
			assert xi == xo, "Encryption / decryption test failed."
			
			print(" testing identity automaton...")
			xr = list(ll(xi + [Vector.random(8) for _i in range(memory_size)]))[memory_size:]
			assert xi == xr, "Identity automaton test failed."
			
			print(" ok")
			
		
		#quit()
		
		#Automaton.fapkc0(memory_size=6)
		
		'''
		for i in (2, 3, 4, 5, 16, 64, 128, 512, 1024):
			if verbose: print()
			if verbose: print("test ModularRing(size={})".format(i))
			ring = ModularRing.get_algebra(size=i)
			if verbose: print(" automaton test")
			test_automaton_composition(ring)
		'''
		
		if verbose: print()
		if verbose: print("test BooleanRing()")
		ring = BooleanRing.get_algebra()
		if verbose: print(" automaton test")
		test_automaton_composition(ring)
		
		'''
		for i in (2, 3, 4, 5, 16, 64, 128, 512, 1024):
			if verbose: print()
			if verbose: print("test GaloisRing(size={})".format(i))
			field = GaloisField.get_algebra(size=i)
			if verbose: print(" automaton test")
			test_automaton_composition(field)
		
		assert BinaryRing.get_algebra(exponent=1)(1) != RijndaelRing(1)
		
		for i in (2, 3, 4, 5, 8, 9, 10):
			if verbose: print()
			if verbose: print("test BinaryRing(exponent={})".format(i))
			field = BinaryRing.get_algebra(exponent=i)
			if verbose: print(" automaton test")
			test_automaton_composition(field)
		'''
		
		if verbose: print()
		if verbose: print("test RijndaelField()")
		field = RijndaelField
		if verbose: print(" automaton test")
		test_automaton_composition(field)
Пример #19
0
    def fit_transform(self, database):
        def func(col):
            np.nan_to_num(col.data, copy=False)

        parallel(func, database.num_columns)
Пример #20
0
    x_draw = np.linspace(0, big_zero + 1000, 500)
    y_draw = utils.f_2(x_draw, params[0], params[1], params[2])

    print(params, 'stop=', stop)
    file = open('final_df/r6(20180916~20180930)all/params.txt', 'w')
    file.write(str(params[0]) + ',' + str(params[1]) + ',' + str(params[2]) + '\n' + str(big_zero))
    file.close()

    print('========================')
    plt.plot(x_draw, y_draw, 'g-', label='fitting curve')
    plt.legend(loc='best')
    plt.show()

    # 完成全部的权重、时间处理,现在开始并行迭代计算:
    print(data.shape)
    data = utils.parallel(data, params, big_zero)[0]
    user_ts = utils.parallel(data, params, big_zero)[1]
    goal_ts = utils.parallel(data, params, big_zero)[2]

    # 将弥补项考虑进来,以此更新ts_pts值:
    print('ts is updating...')
    data = utils.parallel_ts(data, user_ts, goal_ts)

    print('DATA is saving...')
    data.to_csv('final_df/r6(20180916~20180930)all/data_round6.csv', index=False)
    print('==========================================')

    print('school rank1 is saving...')
    frame1 = result.school_rank(data, user_ts)
    frame1.to_csv('final_df/r6(20180916~20180930)all/school_rank_round6.csv')
    print('==========================================')
Пример #21
0
def step_souper(args: Namespace) -> int:
    return parallel("souper {} > {.}.souper", args.bc_list)
Пример #22
0
def step_dis(args: Namespace) -> int:
    return parallel("llvm-dis", args.bc_list) + parallel("llvm-dis", args.opt_bc_list)
Пример #23
0
def generate_encrypt_aes_128_fsm(key):
    print("Composing AES round prefix automaton...")
    #print(" generate_clock:", [str(_x) for _x in generate_clock_fsm.output_transition], [str(_x) for _x in generate_clock_fsm.state_transition])
    #print(" delay 16:", [str(_x) for _x in delay_16_fsm.output_transition], [str(_x) for _x in delay_16_fsm.state_transition])

    aes_encrypt_round_prefix_fsm = generate_clock_fsm @ delay_16_fsm
    #print(" unoptimized:", [_x.circuit_size() for _x in aes_encrypt_round_prefix_fsm.output_transition], [_x.circuit_size() for _x in aes_encrypt_round_prefix_fsm.state_transition])
    aes_encrypt_round_prefix_fsm.optimize()
    #print(" optimized:", [str(_x) for _x in aes_encrypt_round_prefix_fsm.output_transition], [str(_x) for _x in aes_encrypt_round_prefix_fsm.state_transition])

    print("Composing AES round suffix automaton...")
    print(
        " sub_bytes:",
        [_x.circuit_size() for _x in encrypt_sub_bytes_fsm.output_transition],
        [_x.circuit_size() for _x in encrypt_sub_bytes_fsm.state_transition])
    print("           ", [
        len(_x.variables_set())
        for _x in encrypt_sub_bytes_fsm.output_transition
    ], [
        len(_x.variables_set())
        for _x in encrypt_sub_bytes_fsm.state_transition
    ])
    print(
        " shift_rows:",
        [_x.circuit_size() for _x in encrypt_shift_rows_fsm.output_transition],
        [_x.circuit_size() for _x in encrypt_shift_rows_fsm.state_transition])
    print("            ", [
        len(_x.variables_set())
        for _x in encrypt_shift_rows_fsm.output_transition
    ], [
        len(_x.variables_set())
        for _x in encrypt_shift_rows_fsm.state_transition
    ])
    print(" mix_columns:", [
        _x.circuit_size() for _x in encrypt_mix_columns_fsm.output_transition
    ], [_x.circuit_size() for _x in encrypt_mix_columns_fsm.state_transition])
    print("             ", [
        len(_x.variables_set())
        for _x in encrypt_mix_columns_fsm.output_transition
    ], [
        len(_x.variables_set())
        for _x in encrypt_mix_columns_fsm.state_transition
    ])
    print(" delay_16:",
          [_x.circuit_size() for _x in delay_16_fsm.output_transition],
          [_x.circuit_size() for _x in delay_16_fsm.state_transition])
    print("          ",
          [len(_x.variables_set()) for _x in delay_16_fsm.output_transition],
          [len(_x.variables_set()) for _x in delay_16_fsm.state_transition])
    print(" remove_clock:",
          [_x.circuit_size() for _x in remove_clock_fsm.output_transition],
          [_x.circuit_size() for _x in remove_clock_fsm.state_transition])
    print(
        "              ",
        [len(_x.variables_set()) for _x in remove_clock_fsm.output_transition],
        [len(_x.variables_set()) for _x in remove_clock_fsm.state_transition])

    with parallel():
        aes_encrypt_round_suffix_fsm = encrypt_sub_bytes_fsm @ encrypt_shift_rows_fsm @ encrypt_mix_columns_fsm @ delay_16_fsm @ remove_clock_fsm
        print(" unoptimized:", [
            _x.circuit_size()
            for _x in aes_encrypt_round_suffix_fsm.output_transition
        ], [
            _x.circuit_size()
            for _x in aes_encrypt_round_suffix_fsm.state_transition
        ])
        aes_encrypt_round_suffix_fsm.optimize()
        print(" optimized:", [
            _x.circuit_size()
            for _x in aes_encrypt_round_suffix_fsm.output_transition
        ], [
            _x.circuit_size()
            for _x in aes_encrypt_round_suffix_fsm.state_transition
        ])
        print(" ", [
            _x.circuit_size()
            for _x in aes_encrypt_round_suffix_fsm.output_transition
        ], [
            _x.circuit_size()
            for _x in aes_encrypt_round_suffix_fsm.state_transition
        ])

    print("Calculating AES 128 key automaton...")
    argument = vector(Automaton.x[_i] for _i in range(18))
    history = deque(
        vector(Automaton.s[_j, _i] for _i in range(8)) for _j in range(1, 17))
    generator = add_round_key_128(key, [argument], history)
    result = next(generator)
    exhaust(generator)
    add_round_key_128_fsm = Automaton(output_transition=result,
                                      state_transition=history[0])
    add_round_key_128_fsm.optimize()
    print(
        " ",
        [_x.circuit_size() for _x in add_round_key_128_fsm.output_transition],
        [_x.circuit_size() for _x in add_round_key_128_fsm.state_transition])

    print("Composing AES single round automaton...")
    encrypt_aes_128_fsm = aes_encrypt_round_prefix_fsm @ add_round_key_128_fsm @ aes_encrypt_round_suffix_fsm
    encrypt_aes_128_fsm.optimize()
    print(" ",
          [_x.circuit_size() for _x in encrypt_aes_128_fsm.output_transition],
          [_x.circuit_size() for _x in encrypt_aes_128_fsm.state_transition])
Пример #24
0
	def test_homomorphic_encryption(Ring, block_size, memblock_size, length):
		print("Gonzalez-Llamas homomorphic encryption test")
		print(" algebra:", Ring, ", data block size:", block_size, ", memory block size:", memblock_size, ", stream length:", length)
		
		Automaton = automaton_factory(Ring)
		Vector = Automaton.base_vector
		ConstVector = Automaton.base_const_vector
		
		x = Vector([Automaton.x[_i] for _i in range(block_size)])
		s_1 = Vector([Automaton.s[1, _i] for _i in range(memblock_size)])
		s_2 = Vector([Automaton.s[2, _i] for _i in range(memblock_size)])
		s_3 = Vector([Automaton.s[3, _i] for _i in range(memblock_size)])
		
		variables = list(x) + list(s_1) + list(s_2) + list(s_3)
		
		def automaton_input():
			for i in range(length):
				yield ConstVector.random(block_size)
		
		for i in range(1, 5):
			print()
			print(" round", i)
			print("  generating automata...")
			memory_size = i + 4
			#mixer, unmixer = Automaton.linear_nodelay_wifa_pair(block_size=block_size, memory_size=memory_size)
			mixer, unmixer = Automaton.fapkc0(block_size=block_size, memory_size=memory_size)
			plain_automaton = Automaton(Vector.random(dimension=block_size, variables=variables, order=3), Vector.random(dimension=memblock_size, variables=variables, order=3))
			
			print("  optimizing automata...")
			start_time = time()
			print(f"   mixer: {mixer.output_transition.circuit_size()} {mixer.state_transition.circuit_size()} {mixer.output_transition.dimension} {mixer.state_transition.dimension}")
			mixer.optimize()
			print(f"          {mixer.output_transition.circuit_size()} {mixer.state_transition.circuit_size()}")
			print(f"   unmixer: {unmixer.output_transition.circuit_size()} {unmixer.state_transition.circuit_size()} {unmixer.output_transition.dimension} {unmixer.state_transition.dimension}")
			unmixer.optimize()
			print(f"            {unmixer.output_transition.circuit_size()} {unmixer.state_transition.circuit_size()}")
			print(f"   plain: {plain_automaton.output_transition.circuit_size()} {plain_automaton.state_transition.circuit_size()} {plain_automaton.output_transition.dimension} {plain_automaton.state_transition.dimension}")
			plain_automaton.optimize()
			print(f"          {plain_automaton.output_transition.circuit_size()} {plain_automaton.state_transition.circuit_size()}")
			print("   time:", int(time() - start_time))
			
			print("  composing automata...")
			start_time = time()
			homo_automaton = mixer @ plain_automaton @ unmixer
			print("   time:", int(time() - start_time))
			print("  mixing states")
			start_time = time()
			homo_automaton.mix_states()
			print("   time:", int(time() - start_time))
			
			print("  optimizing automata...")
			start_time = time()
			print(f"   homomorphic: {homo_automaton.output_transition.circuit_size()} {homo_automaton.state_transition.circuit_size()} {homo_automaton.output_transition.dimension} {homo_automaton.state_transition.dimension}")
			print(f"                {[_circuit.circuit_size() for _circuit in homo_automaton.output_transition]} {[_circuit.circuit_size() for _circuit in homo_automaton.state_transition]}")
			homo_automaton.optimize()
			print(f"                {homo_automaton.output_transition.circuit_size()} {homo_automaton.state_transition.circuit_size()}")
			print(f"                {[_circuit.circuit_size() for _circuit in homo_automaton.output_transition]} {[_circuit.circuit_size() for _circuit in homo_automaton.state_transition]}")
			print("   time:", int(time() - start_time))
			
			print("  compiling automata...")
			start_time = time()
			compiler = Compiler()
			
			#try:
			#	Ring.compile_tables('RijndaelField', compiler)
			#except AttributeError:
			#	pass
			
			with parallel(0):
				mixer.compile('m', compiler)
				unmixer.compile('u', compiler)
				plain_automaton.compile('p', compiler)
				homo_automaton.compile('h', compiler)
			code = compiler.compile()
			
			#Path('automaton_' + str(i) + '.bc').write_bytes(code.modules[0].as_bitcode())
			
			mixer = mixer.wrap_compiled('m', code)
			unmixer = unmixer.wrap_compiled('u', code)
			plain_automaton = plain_automaton.wrap_compiled('p', code)
			homo_automaton = homo_automaton.wrap_compiled('h', code)
			print("   time:", int(time() - start_time))
			
			print("  encryption/decryption test...")
			text = list(automaton_input())
			start_time = time()
			with code:
				result1 = list(homo_automaton(text))
				result2 = list(mixer(plain_automaton(unmixer(text))))
			print("   actual:   ", ''.join(['{:02x}'.format(int(_ch)) for _ch in result1]))
			print("   predicted:", ''.join(['{:02x}'.format(int(_ch)) for _ch in result2]))
			assert result1 == result2
			print("   time:", int(time() - start_time))
			
			Path(f'homomorphic_{i}.ll').write_text(str(compiler))
Пример #25
0
	print(ls.state_transition.circuit_size(), [_x.circuit_size() for _x in ls.state_transition])
	print()
	print(li.output_transition.circuit_size(), [_x.circuit_size() for _x in li.output_transition])
	print(li.state_transition.circuit_size(), [_x.circuit_size() for _x in li.state_transition])
	
	ns, ni = Automaton.nonlinear_nodelay_wifa_pair(block_size=8, memory_size=5)
	
	print()
	print(ns.output_transition.circuit_size(), [_x.circuit_size() for _x in ns.output_transition])
	print(ns.state_transition.circuit_size(), [_x.circuit_size() for _x in ns.state_transition])
	print()
	print(ni.output_transition.circuit_size(), [_x.circuit_size() for _x in ni.output_transition])
	print(ni.state_transition.circuit_size(), [_x.circuit_size() for _x in ni.state_transition])
	
	straight = ns @ ls
	inverse = li @ ni
	
	print()
	print(straight.output_transition.circuit_size(), [_x.circuit_size() for _x in straight.output_transition])
	print(straight.state_transition.circuit_size(), [_x.circuit_size() for _x in straight.state_transition])
	print()
	print(inverse.output_transition.circuit_size(), [_x.circuit_size() for _x in inverse.output_transition])
	print(inverse.state_transition.circuit_size(), [_x.circuit_size() for _x in inverse.state_transition])
	'''


	with parallel():
		#test_fapkc_encryption(BooleanRing.get_algebra(), 8, 64, print_data=True)
		test_homomorphic_encryption(BooleanRing.get_algebra(), 8, 8, 128)

Пример #26
0
def main():
    # settings related to performance/parallelization
    amount_buckets = int(os.environ.get('AMOUNT_BUCKETS', "256"))
    limit_names = set(
        filter(lambda n: bool(n),
               os.environ.get('LIMIT_NAMES', "").split(',')))
    max_minutes = int(os.environ.get('MAX_MINUTES', "0"))
    bucket_jobs = int(os.environ.get('BUCKET_JOBS', "0"))
    start_bucket = int(os.environ.get('BUCKET_START', "0"))
    workers = int(os.environ.get('WORKERS', multiprocessing.cpu_count() * 2))

    # general settings
    dump_dir = os.environ.get('DUMP_DIR', "./sdist")
    extractor_src = os.environ.get("EXTRACTOR_SRC")
    if not extractor_src:
        raise Exception(
            "Set env variable 'EXTRACTOR_SRC to {mach-nix}/lib/extractor'")
    min_free_gb = int(os.environ.get('MIN_FREE_GB', "0"))
    py_vers_short = os.environ.get('PYTHON_VERSIONS',
                                   "27,36,37,38,39,310").strip().split(',')
    pypi_fetcher_dir = os.environ.get('PYPI_FETCHER', '/tmp/pypi_fetcher')
    store = os.environ.get('STORE', None)

    deadline_total = time() + max_minutes * 60 if max_minutes else None

    # cache build time deps, otherwise first job will be slow
    with Measure("ensure build time deps"):
        build_base(extractor_src, py_vers_short, store=store)

    garbage_collected = False

    for idx, bucket in enumerate(LazyBucketDict.bucket_keys()):
        # calculate per bucket deadline if MAX_MINUTES is used
        if deadline_total:
            amount = min(amount_buckets, 256 - start_bucket)
            deadline = time() + (deadline_total - time()) / amount
        else:
            deadline = None
        if idx < start_bucket or idx >= start_bucket + amount_buckets:
            continue
        pkgs_dict = LazyBucketDict(dump_dir, restrict_to_bucket=bucket)
        pypi_index = LazyBucketDict(f"{pypi_fetcher_dir}/pypi",
                                    restrict_to_bucket=bucket)
        # load error data
        error_dict = LazyBucketDict(dump_dir + "-errors",
                                    restrict_to_bucket=bucket)
        decompress(error_dict.by_bucket(bucket))
        with Measure('Get processed pkgs'):
            print(
                f"DB contains {len(list(pkgs_dict.keys()))} pkgs at this time for bucket {bucket}"
            )
        with Measure("decompressing data"):
            decompress(pkgs_dict.by_bucket(bucket))
        # purge data for old python versions and packages which got deleted from pypi
        with Measure("purging packages"):
            purge(pypi_index, pkgs_dict, bucket, py_vers_short)
        with Measure("getting jobs"):
            jobs = get_jobs(pypi_index,
                            error_dict,
                            pkgs_dict,
                            bucket,
                            py_vers_short,
                            limit_num=bucket_jobs,
                            limit_names=limit_names)
            if not jobs:
                continue
            compute_drvs(jobs, extractor_src, store=store)

        # ensure that all the build time dependencies are cached before starting,
        # otherwise jobs might time out
        if garbage_collected:
            with Measure("ensure build time deps"):
                build_base(extractor_src, py_vers_short, store=store)
        with Measure('executing jobs'):
            if workers > 1:
                pool_results = utils.parallel(extract_requirements,
                                              (jobs, (deadline, ) * len(jobs),
                                               (len(jobs), ) * len(jobs),
                                               (store, ) * len(jobs)),
                                              workers=workers,
                                              use_processes=False)
            else:
                pool_results = [
                    extract_requirements(args, deadline, store)
                    for args in jobs
                ]

        # filter out exceptions
        results = []
        for i, res in enumerate(pool_results):
            if not isinstance(res, Exception):
                for r in res:
                    results.append(r)

        # insert new data
        for pkg in sorted(results,
                          key=lambda pkg:
                          (pkg.name, pkg.version, sort_key_pyver(pkg.py_ver))):
            py_ver = ''.join(filter(lambda c: c.isdigit(), pkg.py_ver))
            if pkg.error:
                target = error_dict
            else:
                target = pkgs_dict
            insert(py_ver,
                   pkg.name,
                   pkg.version,
                   pkg_to_dict(pkg),
                   target,
                   error=pkg.error)

        # compress and save
        with Measure("compressing data"):
            compress(pkgs_dict.by_bucket(bucket))
            compress(error_dict.by_bucket(bucket))
        print("finished compressing data")
        with Measure("saving data"):
            pkgs_dict.save()
            error_dict.save()

        # collect garbage if free space < MIN_FREE_GB
        if shutil.disk_usage(store or "/nix/store").free / (1000**
                                                            3) < min_free_gb:
            with Measure("collecting nix store garbage"):
                sp.run(
                    f"nix-collect-garbage {f'--store {store}' if store else ''}",
                    capture_output=True,
                    shell=True)
                garbage_collected = True

        # stop execution if deadline occurred
        if deadline_total and time() > deadline_total:
            print(
                f"Deadline occurred. Stopping execution. Last Bucket was {bucket}"
            )
            break
Пример #27
0
def all(countries=None):
    """Scraps servers for all communities at once in parallel"""
    action = lambda c: (c, servers(c))
    return utils.parallel(countries or communities(), action)