async def list(self, ctx: commands.Context, category=None): """ Show status of CTF challenges. Use `>chal ls [category]!` to post to channel. """ ctf = self._get_ctf(ctx) public = False if category and '!' in category: category = category.replace('!','') public = True if not category: if isinstance(ctx.channel, discord.TextChannel): category = ctx.channel.name.split(f'{ctf.name}_')[1] else: category = 'all' if category not in ctf.tags: category = 'all' solved, unsolved = ctf.challenge_summary(category) desc = f'{category} challenges\n\n' if solved: desc += f'# solved\n{solved}\n' if unsolved: desc += f'# unsolved\n{unsolved}' for chunk in chunkify(desc, 1980): # emb = discord.Embed(title=f'{category} challenges', description=chunk, colour=4387968) # await ctx.message.author.send(embed=emb) if public: await ctx.send(f'```md\n{chunk}```') else: await ctx.message.author.send(f'```md\n{chunk}```')
def find_repetitions(ciphers): for cipher in ciphers: chunks = [c for c in chunkify(cipher,32)] counter = Counter(chunks) block, repetitions = counter.most_common()[0] if repetitions > 1: return repetitions, block, cipher
def predict(self, x): self.eval() chunks = chunkify(x, self.max_len, self.min_len) logits = self.forward(chunks) mean_logits = logits.mean(dim=0) index = torch.where(mean_logits > 0)[0] result = [self.tags[i] for i in index] return result
def __init__(self, file_path, max_len, min_len): df = pd.read_csv(file_path) df_dict = df.to_dict("records") new_df = [ set_body(row.copy(), chunk) for row in df_dict for chunk in chunkify(row["body"], max_len, min_len) ] self.data = pd.DataFrame(new_df).set_index("title")
async def getsource(self, ctx: Context, command: str): cmd = self.bot.all_commands.get(command) if cmd is None: await ctx.send(f"Could not find `{command}`") return for chunk in chunkify(textwrap.dedent(inspect.getsource( cmd.callback))): await ctx.send(wrap(chunk, lang='py'))
def do_prediction(intbl, pbms, gene_names, filteropt="p-value", filterval=0.0001, spec_ecutoff=0.4, nonspec_ecutoff=0.35, num_threads=None): """ intbl: preprocessed table filteropt: p-value or z-score filterval: # TFs for opt z-score and p-val cutoff for p-value """ # intbl: #rowidx,seq,val,diff,t,pbmname,escore_seq start_time = time.time() # move the comment here for testing predfiles = [config.PREDDIR + "/" + pbm for pbm in pbms] # os.listdir(preddir) preds = utils.chunkify( predfiles, config.PCOUNT) # chunks the predfiles for each process if filteropt == "p-value": filterval = float(filterval) else: #z-score filterval = int(filterval) # collect the short2long_map -- shared, so only one i/o emap = pd.read_csv("%s/index_short_to_long.csv" % (config.ESCORE_DIR), header=0, index_col=0, sep=',', dtype='Int32') # pd.DataFrame emap = np.array( emap[emap.columns[0]]) - 1 #emap[emap.columns[0]].to_numpy() - 1 # --- PARALLEL PART --- # need to use manager here #shared_ready_sum = mp.Manager().Value('i', 0) # prepare all parameters but predlist predict_partial = ft.partial( predict, **{ 'dataset': intbl, 'emap': emap, 'filteropt': filteropt, 'filterval': filterval, 'spec_ecutoff': spec_ecutoff, 'nonspec_ecutoff': nonspec_ecutoff, 'num_threads': num_threads }) with cc.ProcessPoolExecutor(config.PCOUNT) as executor: res = executor.map(predict_partial, preds) return postprocess(res, predfiles, gene_names, filteropt, filterval)
def init(block, key, format_): if format_ == 'x': key = [chr(int(k,16)) for k in chunkify(key,2)] block = [chr(int(b,16)) for b in chunkify(block,2)] key = [ord(k) for k in key] keysize=len(key)*8 if len(block) != 16: raise BlockSizeError('Block must be of length 16 bytes') if not valid_key(key,keysize): raise KeySizeError('Key must be either 128, 192 or 256 bit') state = [ord(b) for b in block] state = [state[i:i+4] for i in range(0, len(state)-3, 4)] nk = keysize//32 nr = nk+6 roundkey = key_expand(key, nr, nk) return state,roundkey,nr
def fetch_from_ensembl_genomes(): """Queries MySQL servers hosted by Ensembl Genomes To connect via Terminal (e.g. to debug), run: mysql --user=anonymous --host=mysql-eg-publicsql.ebi.ac.uk --port=4157 -A """ global time_ensembl t0 = time_ms() logger.info('Entering fetch_from_ensembl_genomes') connection = db_connect(host='mysql-eg-publicsql.ebi.ac.uk', user='******', port=4157) logger.info('Connected to Ensembl Genomes database') cursor = connection.cursor() db_map = {} org_map = {} # Get a list of databases we want to query for karyotype data cursor.execute('show databases like "%core_%"') for row in cursor.fetchall(): db = row[0] if 'collection' in db: continue name_slug = db.split('_core')[0].replace('_', '-') db_map[db] = name_slug db_tuples = [item for item in db_map.items()] cursor.close() # Take the list of DBs we want to query for karyotype data, # split it into 100 smaller lists, # then launch a new thread for each of those small new DB lists # to divide up the work of querying remote DBs. num_threads = 100 db_tuples_lists = chunkify(db_tuples, num_threads) with ThreadPoolExecutor(max_workers=num_threads) as pool: for result in pool.map(query_ensembl_karyotype_db, db_tuples_lists): for db_tuple in result: name_slug, asm_data = db_tuple if name_slug in org_map: org_map[name_slug].append(asm_data) else: org_map[name_slug] = [asm_data] logger.info('before exiting with clause') time_ensembl += time_ms() - t0 return org_map
def fetch_from_ucsc(): """Queries MySQL instances hosted by UCSC Genome Browser To connect via Terminal (e.g. to debug), run: mysql --user=genome --host=genome-mysql.soe.ucsc.edu -A """ global time_ucsc t0 = time_ms() logger.info('Entering fetch_from_ucsc') connection = db_connect(host='genome-mysql.soe.ucsc.edu', user='******') logger.info('Connected to UCSC database') cursor = connection.cursor() db_map = {} org_map = {} cursor.execute('use hgcentral') cursor.execute(''' SELECT name, scientificName FROM dbDb WHERE active = 1 ''') rows = cursor.fetchall() for row in rows: db = row[0] # e.g. H**o sapiens -> h**o-sapiens name_slug = row[1].lower().replace(' ', '-') db_map[db] = name_slug db_tuples = [item for item in db_map.items()] # Take the list of DBs we want to query for cytoBandIdeo data, # split it into 30 smaller lists, # then launch a new thread for each of those small new DB lists # to divide up the work of querying remote DBs. num_threads = 30 db_tuples_lists = chunkify(db_tuples, num_threads) with ThreadPoolExecutor(max_workers=num_threads) as pool: for result in pool.map(query_ucsc_cytobandideo_db, db_tuples_lists): if result is None: continue asm_data = result if name_slug in org_map: org_map[name_slug].append(asm_data) else: org_map[name_slug] = [asm_data] time_ucsc += time_ms() - t0 return org_map
def _delete_old_lines(self, should_delete): """ Remove outdated lines from the database. :param should_delete: lines to be deleted """ lines_ref = self._firestore.collection(u'lines') # split the should_delete into chunks of size 500 # and then batch delete them for chunk in chunkify(should_delete, FIRESTORE_BATCH_MAXIMUM_SIZE): batch = self._firestore.batch() for line in chunk: batch.delete(lines_ref.document(line)) logger.info(f'deleting outdated line {line}.') batch.commit()
def do_prediction(intbl, pbms, gene_names, filteropt="p-value", filterval=0.0001, spec_ecutoff=0.4, nonspec_ecutoff=0.35): """ intbl: preprocessed table filteropt: p-value or z-score filterval: # TFs for opt z-score and p-val cutoff for p-value """ # intbl: #rowidx,seq,val,diff,t,pbmname,escore_seq start_time = time.time() # move the comment here for testing predfiles = [config.PREDDIR + "/" + pbm for pbm in pbms] # os.listdir(preddir) preds = utils.chunkify( predfiles, config.PCOUNT) # chunks the predfiles for each process # need to use manager here shared_ready_sum = mp.Manager().Value('i', 0) pool = mp.Pool(processes=config.PCOUNT) if filteropt == "p-value": filterval = float(filterval) else: #z-score filterval = int(filterval) async_pools = [ pool.apply_async(predict, (preds[i], intbl, shared_ready_sum, filteropt, filterval, spec_ecutoff, nonspec_ecutoff)) for i in range(0, len(preds)) ] total = len(predfiles) while not all([p.ready() for p in async_pools]): time.sleep(2) res = [p.get() for p in async_pools] pool.terminate() colnames, datavalues = postprocess(res, gene_names, filteropt, filterval) return colnames, datavalues
async def run(self, ctx: Context, *, string: str = ''): """Runs python code in all codeblocks in the message.""" env = { 'bot': self.bot, 'ctx': ctx, 'channel': ctx.channel, 'author': ctx.author, 'guild': ctx.guild, 'message': ctx.message, '_': self.last_return, **globals() } for code in re.findall(self._code_re, string): output = await self.run_code(code.strip(), env) for chunk in chunkify(output, newlines=True, wrapped=True): await ctx.send(chunk)
async def on_command_error(self, ctx: Context, error: Exception): if isinstance(error, commands.CommandInvokeError): e = error.original if await bot.is_owner(ctx.author): tb = traceback.format_exception(type(error), error, error.__traceback__) for chunk in chunkify(tb, newlines=True, wrapped=True): await ctx.send(chunk) return msg = await ctx.send( "We're sorry, something went wrong. Would you like to submit a bug report?" ) if await confirm(bot, msg, ctx.author): await ctx.send('Thank you, a bug report has been sent') await report_bug(ctx, e) else: await msg.edit(content="We're sorry, something went wrong") elif isinstance(error, commands.CommandNotFound): if not ctx.invoked_with: return cmds = [ name for name, command in bot.all_commands.items() if not command.hidden ] match = difflib.get_close_matches(ctx.invoked_with, cmds, 1) if match: await ctx.send( f"Sorry I don't know what `{ctx.invoked_with}` is, did you perhaps mean `{match[0]}`?" ) else: await ctx.send( f"Sorry I don't know what `{ctx.invoked_with}` is.") elif isinstance(error, commands.UserInputError): bot.help_command.context = ctx signature = bot.help_command.get_command_signature(ctx.command) await ctx.send(error.args[0] + f'\nUsage: `{signature}`') elif isinstance(error, commands.CommandError): await ctx.send(error.args[0]) else: raise error
def _save(self, lines): """ Save lines scraped from the website. :param lines: lines to be saved """ lines_ref = self._firestore.collection(u'lines') # split the lines in chunks and batch update them in the database for chunk in chunkify(lines, FIRESTORE_BATCH_MAXIMUM_SIZE): batch = self._firestore.batch() for line in chunk: batch.set(lines_ref.document(line.code), { u'code': line.code, u'name': line.name, u'timetable_url': line.url, u'cities': list(line.cities), u'file_hash': line.file_hash }, merge=True) logging.info(f'saving line with code {line.code}') batch.commit()
def get_tracks(self): """Collects track metadata from spotify and uploads it to our database. Retreives a list of album ids from the "albums" table in our database. Then, we splits this list into chunks and assigns celery tasks to each one. This allows our program to distribute (e.g., multi-process) tasks across a set of workers rather than needing to completing each chunk and their associated tasks in sequential order. """ time = datetime.utcnow() time = time.replace(minute=0, second=0, microsecond=0) query = f""" SELECT DISTINCT album_id FROM ( SELECT album_id, max(year) as year, max(track_count) as track_count FROM {config.db_schema}.{config.tables.get('albums')} WHERE track_count < 23 and year < 2020 GROUP BY album_id ORDER BY year DESC, track_count DESC) as innerQ LIMIT 200000; """ with config.engine.connect() as conn: album_ids = pd.read_sql(query, con=conn).album_id.values.tolist() # split the album ids list into chunks to create more memory efficient tasks album_ids = chunkify(album_ids) # create the async tasks tasks = group( (albums_tracks.s(ids, i) | tracks_audio_features.s() | push_tracks.s()) for i, ids in enumerate(album_ids) ) tracks_flow = ( # noqa: 841 ((tasks) | drop_dup_tracks.si() | flow_complete.si(self.name, time)) .delay() .get() )
def encode(data, format_type='b'): index2, index3, index4 = indexes(format_type) chunk_size = 6 if format_type == 'x' else 3 b64 = '' chunks = [c for c in chunkify(data, chunk_size)] for chunk in chunks: byte1 = chunk[0:index2] byte2 = chunk[index2:index3] byte3 = chunk[index3:index4] ch1=ch2=ch3=ch4='' ch1, ch2,bb1 = encode_byte(byte1, format_type, 2,3,4) if byte2: ch2,ch3,bb1 = encode_byte(byte2, format_type, 4,15,2,b1=bb1) if byte3: ch3,ch4,_ = encode_byte(byte3, format_type, 6,63, 0, b1=bb1) else: ch4 = '=' else: ch3 = '==' b64 += ch1+ch2+ch3+ch4 return b64
import aes import base64 from utils import chunkify, readfile if __name__ == '__main__': data = ''.join(readfile('testdata/7.txt')) data = base64.decode(data) data = chunkify(data,16) key = 'YELLOW SUBMARINE' decrypted = '' for d in data: decrypted += aes.decrypt(d, key) print(decrypted)
def to_byte(data): for line in data: yield ''.join(chr(int(c,16)) for c in chunkify(line.strip(),2))
import xor_crypto from utils import chunkify if __name__ == '__main__': data = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736' # hex -> byte string data = ''.join(chr(int(c,16)) for c in chunkify(data,2)) key = xor_crypto.find_key(data, 1, 2) print(xor_crypto.decrypt(data,key))
async def roll(self, ctx, *args): """Rolls dice (supports algebraic notation, such as !roll 3d5+10)""" def parseint(x, default=0): try: return int(x or default) except ValueError: return x repatt = ( # !roll 3d5+10 check these dubs r'(?P<dice>\d+(?=[dD]))?' # 3 r'([Dd](?=\d))?' # d r'(?P<sides>\d+)?' # 5 r'(?P<mod>\s?[-\+]\s?\d+)?' # +10 r'(?P<comment>.*)' # check these dubs ) use_default_roll = True dice, sides, mod = (DEFAULT_ROLL_DICE_COUNT, DEFAULT_ROLL_SIDES, DEFAULT_ROLL_MODIFIER) footer = 'Syntax: !roll 1000, !roll 3d5+7, !roll 11d9 check em' match = None if args: args = ' '.join(args).strip() match = re.match(repatt, args) if match: grps = match.groupdict() # Unpack comment comment = grps['comment'].strip() if comment: author = ctx.message.author name = getattr(author, 'nick', author.name) footer = f'{author.nick or author.name}: {comment}' has_arithmetic = any( map(lambda x: x != None, [grps['dice'], grps['sides'], grps['mod']])) if has_arithmetic: use_default_roll = False # Unpack arithmetic # Sides is compulsory for a match sides = parseint(grps['sides'], 1) # mod defaults to 0 mod = parseint(grps['mod'], 0) # dice should default to 1 dice = parseint(grps['dice'], 1) # Check if expression is too long (too much math) if len(str(dice) + str(sides) + str(mod)) > 20: await ctx.send(f"That's just way too much work {BIRB}") return # Calc output res = sum(random.randint(1, sides) + mod for _ in range(dice)) # Format output if is_hot_time(): # suppose res == 12345; transform to '||12||||34||||5||' bigrammed = lambda s: chunkify(s, 2) res = ''.join(f'||{pair}||' for pair in bigrammed(str(res))) # Format input into algebraic notation or 0-99 formatted = '{D}d{S}{P}{M}'.format( D='' if dice == 1 else dice, S=sides or 0, P=('+' if mod > 0 else '-') if mod else '', M=mod or '') if use_default_roll: # Assign to fallback formats formatted = '0-99' # Build embed and set comment or tip emb = Embed(description=f"Rolling {formatted}: **{res}**") if footer: emb.set_footer(text=footer) await ctx.send(embed=emb)
print("sentences_a length:", len(sentences_a)) _, batch_catchphrase_embedding_a = encoder(**encoded_batch_catchphrase_a) # [7, 768] _, batch_catchphrase_embedding_b = encoder(**encoded_batch_catchphrase_b) # [13,768] _, sentence_embedding_a = encoder(**encoded_sentence_a) # [1, 768] _, sentence_embedding_b = encoder(**encoded_sentence_b) # [1, 768] elif exp=="mean": sentence_indices_a = tokenizer(sentences_a, truncation=True, return_tensors="pt", padding='max_length', max_length=512 * 12) sentence_indices_b = tokenizer(sentences_b, truncation=True, return_tensors="pt", padding='max_length', max_length=512 * 12) _, batch_catchphrase_embedding_a = encoder(**encoded_batch_catchphrase_a) # [7, 768] _, batch_catchphrase_embedding_b = encoder(**encoded_batch_catchphrase_b) # [13,768] chunk_indices_a = chunkify(sentence_indices_a) chunk_indices_b = chunkify(sentence_indices_b) chunk_embeddings_a = encode_chunks(chunk_indices_a, encoder) chunk_embeddings_b = encode_chunks(chunk_indices_b, encoder) #################### Aggregation ###################### sentence_embedding_a = torch.mean(chunk_embeddings_a, dim=0).unsqueeze(0) sentence_embedding_b = torch.mean(chunk_embeddings_b, dim=0).unsqueeze(0) del sentence_indices_a,sentence_indices_b,_, del chunk_indices_a, chunk_indices_b, chunk_embeddings_a, chunk_embeddings_b left_left = torch.cdist(sentence_embedding_a, batch_catchphrase_embedding_a, p=2.0) # [1, 768]*[7, 768]=[1, 7] left_right = torch.cdist(sentence_embedding_a, batch_catchphrase_embedding_b,