Exemplo n.º 1
0
    def sync_structure(self):
        media_data = find_media_files()
        self.media_data = sync_files_with_db(media_data)

        if not self.media_data.get('albums'):
            logger.info("No new photos to upload")
            return

        root_node_uri = self.api.get_root_node()

        # Create new folders
        for folder_path, files_bundle in self.media_data['albums'].items():
            folders = folder_path.strip('/').split('/')

            prev_node_uri = root_node_uri

            for cnt, f in enumerate(folders, start=1):

                if cnt == len(folders):
                    nt = 'Album'
                else:
                    nt = 'Folder'

                current_node_uri = self.api.get_or_create_node(
                    prev_node_uri, f, node_type=nt
                )

                if cnt == len(folders):
                    files_bundle['album_uri'] = current_node_uri

                prev_node_uri = current_node_uri

        # Process children albums (get uri or create)
        for folder_path, files_bundle in self.media_data['albums'].items():
            with db.atomic():
                photos_insert_to_db = []
                for f in files_bundle['files']:
                    photos_insert_to_db.append(
                        {
                            'local_path': f,
                            'local_md5': get_md5(f),
                            'status': 'pending'
                        }
                    )

                if photos_insert_to_db:
                    logger.info("\tInserting to DB: %d",
                                len(photos_insert_to_db))

                    for photos_insert_to_db_chunk in chunks(
                            photos_insert_to_db, 300):
                        Photo.insert_many(photos_insert_to_db_chunk).execute()

                for files_chunk in chunks(files_bundle['files'], 300):
                    Photo.update(
                        ext_album_key=files_bundle['album_uri']
                    ).where(
                        (Photo.local_path << files_chunk)
                    ).execute()
def generate_hdf5(out_file, dset):
    def load_img(index):
        img = dset.img_dir + '/' + dset.images[index]
        img = Image.open(img).convert('RGB')
        img = transform(img)
        return img

    transform = imagenet_transform('test')

    feat_extractor = tmodels.resnet18(pretrained=True)
    feat_extractor.fc = nn.Sequential()
    feat_extractor.eval().cuda()

    image_feats = []
    for chunk in tqdm.tqdm(utils.chunks(range(len(dset.images)), 512),
                           total=len(dset.images) / 512):
        imgs = map(load_img, chunk)
        imgs = Variable(torch.stack(imgs), volatile=True).cuda()
        feats = feat_extractor(imgs).data.cpu()
        image_feats.append(feats)
    image_feats = torch.cat(image_feats, 0).numpy()
    print image_feats.shape

    hf = h5py.File(out_file, 'w')
    hf.create_dataset('feats', data=image_feats)
    hf.close()
Exemplo n.º 3
0
def cutnpaste(oracle):
    # find the breakpoint for padding
    pad_len = 0
    for pad in range(32, 100):
        oracle_chunks = list(chunks(oracle("A" * pad), 16))
        if oracle_chunks[1] == oracle_chunks[2]:
            pad_len = pad % 16
            break
    payload = "A" * (pad_len) + "admin" + "\v" * 11
    payload_chunk = list(chunks(oracle(payload)))[1]

    cut_payload = "A" * (3 + pad_len)
    cut_chunks = list(chunks(oracle(cut_payload)))[:-1]
    cut_chunks.append(payload_chunk)

    print(decrypt(flatten(cut_chunks)))
Exemplo n.º 4
0
def cbc_padding_oracle_attack(ciphertext):
    blocks = list(chunks(ciphertext))
    plaintext = b""

    # Pairs of blocks, the second one is decrypted
    for o1, o2 in zip(blocks, blocks[1:]):
        decryptedchunk = bytearray(len(o1))
        plainchunk = bytearray(len(o1))
        c1 = bytearray(o1)
        c2 = bytearray(o2)

        # Indexes of characters to decrypt
        indexes = list(reversed(range(0, len(c1))))
        for index in indexes:
            # How many characters are already decrypted
            solved_count = len(c1) - index - 1
            for solved in indexes[:solved_count]:
                # Set the characters so they result in "proper" padding
                c1[solved] = decryptedchunk[solved] ^ (solved_count + 1)
            for byte in range(0, 256):
                c1[index] = byte
                if (validate_padding(bytes(c1 + c2))):
                    decryptedchunk[index] = byte ^ (solved_count + 1)
                    plainchunk[index] = decryptedchunk[index] ^ o1[index]
                    break
                else:
                    continue
        plaintext += bytes(plainchunk)
    return plaintext
Exemplo n.º 5
0
def aes_detect(encryption_function):
    plaintext = b"A" * 100
    ciphertext = encryption_function(plaintext)
    ciphertext_chunks = list(chunks(ciphertext, 16))
    if len(ciphertext_chunks) != len(set(ciphertext_chunks)):
        print("ECB detected")
    else:
        print("CBC detected")
Exemplo n.º 6
0
def crack_aes(aes_function):
    b_size = 0
    # last bytes are going to be padding bytes + last character of ciphertext
    # a cycle in the last bytes is the block size
    last_bytes = [
        cipher[-2]
        for cipher in map(lambda i: aes_function(b"A" * i), range(1, 100))
    ]
    last_two_bytes = last_bytes[-2:]
    for possible_block_size in range(2, 100):
        if last_bytes[-(2 + possible_block_size):-(
                possible_block_size)] == last_two_bytes:
            b_size = possible_block_size
            break
    print(f"Block size: {b_size}")

    # given two same plaintext blocks, check if the ciphertext is also the same
    ciphertext_chunks = list(chunks(aes_function(b"A" * 2 * b_size), b_size))
    if ciphertext_chunks[0] == ciphertext_chunks[1]:
        print("ECB detected...")
    else:
        print("First two blocks do not match, aborting...")
        return

    mapping = {}
    secret = b""

    byte_index = 0
    block_index = 0

    def cipher_block(plaintext, idx):
        return list(chunks(aes_function(plaintext), b_size))[idx]

    while True:
        padding = b_size - 1 - (byte_index % (b_size))
        leak_query = (b"A" * padding)
        leak_cipher = cipher_block(leak_query, block_index)

        if leak_cipher in mapping:
            secret += mapping[leak_cipher]

        else:
            found = False
            for byte in range(0, 256):
                test_block = leak_query + secret[:len(secret)] + bytes([byte])
                test_cipher = cipher_block(test_block, block_index)
                if test_cipher == leak_cipher:
                    mapping[test_cipher] = bytes([byte])
                    secret += bytes([byte])
                    print(f"\r{secret}", end="")
                    found = True
                    break
            if not found:
                print("\nFailed to find next byte")
                return

        byte_index += 1
        block_index = byte_index // b_size
Exemplo n.º 7
0
 async def lyrics(self, ctx, *, song):
     data = await self.bot.session.get(f"https://some-random-api.ml/lyrics?title={quote_plus(song)}")
     song = await data.json()
     embed = discord.Embed(title = f"Lyrics for {song['title']}", description=song['author'])
     embed.set_footer(text="Lyrics provided by some-random-api")
     lyrics_list = list(chunks(song['lyrics'], 1000))
     for ly in lyrics_list:
         embed.add_field(name=zws, value=ly, inline=False)
     await ctx.send(embed=embed)
Exemplo n.º 8
0
 def find_breakpoint():
     for pad in range(32, 100):
         encrypted_chunks = list(chunks(oracle(b"A" * pad)))
         # found a duplicating block
         if len(encrypted_chunks) > len(set(encrypted_chunks)):
             for i in range(1, len(encrypted_chunks)):
                 if encrypted_chunks[i - 1] == encrypted_chunks[i]:
                     # index of chunk containing the last bits of the prefix
                     last_prefix = i - 2
                     # how many bytes we need to fill out that chunk
                     fill_bytes = pad - 32
                     return (last_prefix, fill_bytes)
Exemplo n.º 9
0
def bruteforce():
    base_ciphertext = list(chunks(create_cookie("XadminXtrue")))

    for x in range(0, 256):
        for y in range(0, 256):
            c = bytearray()
            c[:] = base_ciphertext[1]
            c[0] = x
            c[6] = y

            if (check_admin(base_ciphertext[0] + c + base_ciphertext[2] + base_ciphertext[3])):
                print("Obtained admin rights")
                exit() 
Exemplo n.º 10
0
def decrypt_aes_cbc(ciphertext, key, iv):
    assert len(iv) == CHUNK_SIZE

    cipher = Cipher(algorithms.AES(key),
                    modes.ECB(),
                    backend=default_backend())
    plaintext = b""
    for chunk in chunks(ciphertext, CHUNK_SIZE):
        decryptor = cipher.decryptor()
        decrypted = decryptor.update(chunk) + decryptor.finalize()
        plaintext += fixed_xor(decrypted, iv)
        iv = chunk

    return plaintext
Exemplo n.º 11
0
def get_risks(positions, pricing_environment, valuation_datetime, domain, headers):
    """Return pricing results of given positions.
    positions_list: list of basic positions(live, and possibly expiring positions)"""

    start = timer()
    positions_trade_id = list(positions.tradeId.unique())
    trade_batches = list(utils.chunks(positions_trade_id, MAX_PRICING_TRADES_NUM))
    pricing_result = []
    pricing_diagnostics = []
    for trade_batch in trade_batches:
        params = {
            'requests': ['price', 'delta', 'gamma', 'vega', 'theta', 'rho_r'],
            'tradeIds': trade_batch,
            'pricingEnvironmentId': pricing_environment,
            'valuationDateTime': '{}T00:00:00'.format(valuation_datetime),
            'timezone': None
        }
        res = utils.call_request(domain, 'pricing-service', 'prcPrice', params, headers)
        pricing_result.extend(res.get('result') if 'result' in res else [])
        pricing_diagnostics.extend(res.get('diagnostics') if 'diagnostics' in res else [])
    end = timer()
    print('pricing all trades take ' + str(end - start) + ' seconds')
    if len(positions_trade_id) > 0 and len(pricing_result) > 0:
        start = timer()
        price_data = pd.DataFrame(pricing_result, dtype='double')
        risk_df = price_data
        # fill missing elements of cash flows
        risk_df[['vol', 'q']].fillna(0, inplace=True)
        # risk_df['underlyerPrice'].fillna(-1, inplace=True)  # impossible value
        diagnostics_df = pd.DataFrame(pricing_diagnostics)
        if not diagnostics_df.empty:
            diagnostics_df.rename(columns={'key': 'positionId'}, inplace=True)
            diagnostics_df = diagnostics_df[~diagnostics_df.positionId.duplicated(keep='last')]
            risk_df = risk_df.merge(diagnostics_df, on='positionId', how='outer')
        risk_df['message'] = ''
        # pointless in jkzx
        # multi_asset_position_ids = list(positions[positions.productType.isin(_PRODUCT_TYPE_SPREADS)].tradeId.unique())
        # risk_df['message'] = risk_df.apply(lambda row: get_message(row, multi_asset_position_ids), axis=1)

        pe_description = utils.get_pricing_env_description(pricing_environment, domain, headers)
        risk_df['pricing_environment'] = pe_description
        risk_df.set_index('positionId', inplace=True)
        for prc in ['qs', 'vols', 'underlyerPrices', 'deltas', 'gammas', 'vegas']:
            if prc not in risk_df.columns:
                risk_df[prc] = np.nan
        end = timer()
        print('normalize risk results take ' + str(end - start) + ' seconds')
        return risk_df
    else:
        raise RuntimeError('Failed to price trades.')
Exemplo n.º 12
0
def encrypt_aes_cbc(plaintext, key, iv):
    assert len(iv) == CHUNK_SIZE

    cipher = Cipher(algorithms.AES(key),
                    modes.ECB(),
                    backend=default_backend())
    ciphertext = b""
    for chunk in chunks(plaintext, CHUNK_SIZE):
        if len(chunk) < CHUNK_SIZE:
            chunk = pkcs7padding(chunk, CHUNK_SIZE)
        encryptor = cipher.encryptor()
        encrypted = encryptor.update(fixed_xor(chunk, iv))
        iv = encrypted
        ciphertext += encrypted

    return ciphertext
Exemplo n.º 13
0
def aes_oracle(plaintext):

    secret = """Um9sbGluJyBpbiBteSA1LjAKV2l0aCBteSByYWctdG9wIGRvd24gc28gbXkg
aGFpciBjYW4gYmxvdwpUaGUgZ2lybGllcyBvbiBzdGFuZGJ5IHdhdmluZyBq
dXN0IHRvIHNheSBoaQpEaWQgeW91IHN0b3A/IE5vLCBJIGp1c3QgZHJvdmUg
YnkK
"""
    plaintext = RANDOM_PREFIX + plaintext
    plaintext += base64.b64decode(secret)
    plain_chunks = list(chunks(plaintext, 16))

    last_chunk = pkcs7padding(plain_chunks[-1], 16)
    padded_chunks = plain_chunks[:-1] + [last_chunk]
    padded_plaintext = b""
    for chunk in padded_chunks:
        padded_plaintext += chunk

    return encrypt_aes_ecb(padded_plaintext, STATIC_AES_KEY)
Exemplo n.º 14
0
def main():
    st = datetime.now()
    print '#####'*5
    print 'Game Price History Populator FIRED at', st
    print '#####'*5 
    # get all games with ITAD id
    # '-' is not processed game
    # 'not_found' is processed but no ID 
    # (no price history or not applicable, like ps4 exclusive)
    data = list(col_gamespot.find({'itad_id':{'$nin':['-', 'not_found']}}))
    print '#### Scraping price for %d games' % len(data)
    
    base_price_url = 'http://isthereanydeal.com/ajax/game/price?plain=%s' 
    
    urls = []
    refs = {}
    for row in data:
        refs[row['itad_id']] = row
        urls.append(base_price_url%row['itad_id'])
    
    all_chunks = list(chunks(urls, 50))
    for index, chunk in enumerate(all_chunks):
        print '##### Processing chunk ', index, 'out of', len(all_chunks)
        for response in grequests.map((grequests.get(u) for u in chunk)):
            meta = refs.get(response.url.split('=')[-1],{})
            game_nm = meta.get('name')
            try:
                df = grab_price_history(response)
                df = df[[col for col in df.columns if col not in ['certainty', 'emphasis']]]
                df.columns = [re.sub('[^A-Za-z0-9]+', '', col).lower() for col in df.columns]
                df['date']= df.date.apply(lambda d:parse(d))
                data = [v for k,v in df.T.to_dict().iteritems()]
                set_q = meta
                set_q['price_history'] = data
                col_price.update({'_id':meta['_id']},
                                 {'$set': set_q}, 
                                 upsert=True)
                print '[SUCCESS] Upserted: %s' % game_nm
            except Exception, e:
                with open('gen_price_error_log', 'a') as h:
                    print '[ERROR]' + game_nm + '\t' + str(e) 
                    h.write(game_nm + '\t' + str(e) + '\n')
                continue
Exemplo n.º 15
0
def sync_files_with_db(files_tree):
    photos_to_upload = {root_path: {} for root_path in files_tree.keys()}
    total_new_photos = 0

    photos = set()
    for root_path, folders in files_tree.items():
        for folder, files_bundle in folders.items():
            files = files_bundle['files']

            logger.info('Album: %s', folder)
            logger.info("\tTotal photos: %d", len(files))

            for paths_chunk in chunks(files, 300):
                _photos = (Photo.select(Photo.local_path).where(
                    (Photo.local_path << paths_chunk)))

                photos.update(set(_photos))

            db_photos = {_p.local_path for _p in photos}

            logger.info("\tPhotos exist in DB: %d", len(db_photos))

            local_photos = set(files)
            new_photos = local_photos - db_photos

            if new_photos:
                photos_to_upload[root_path][folder] = {
                    'files': list(new_photos),
                    'album_uri': None,
                }
                total_new_photos += len(new_photos)
            else:
                logger.info(
                    "All photos already exist in DB. Upload skipped\n\n")

    del files_tree

    if total_new_photos:
        logger.info("Total new photos to upload: %d", total_new_photos)

    return photos_to_upload
Exemplo n.º 16
0
    def organize_data(self):
        process_list = []
        results_list = []
        queue = Queue()

        # 给每个核心分配要切割的时间段,按时间顺序,调用了一个自定义的chunks
        _range = list(
            range(0,
                  len(self.data) - self.lookback - self.lookahead,
                  self.lookahead))
        _list = list(chunks(_range, int(len(_range) / self.core) + 1))

        X_tmp = np.zeros(shape=(1, self.lookback, len(self.columns)))
        y_tmp = np.zeros(shape=(1, self.lookahead, 2))
        ordered_list = [None] * self.core

        for i in range(self.core):
            p = Process(target=queue_wrapper,
                        args=[
                            queue, self.time_window, i, _list[i],
                            self.lookback, self.lookahead
                        ])
            process_list.append(p)
            p.start()

        for i in range(self.core):
            results_list.append(queue.get())

        for p in process_list:
            p.join()

        for item in results_list:
            ordered_list[item[1]] = item[0]

        # [[X_tmp, y_tmp], [X_tmp, t_tmp], ...] follow the timestamp order
        for item in ordered_list:
            X_tmp = np.concatenate((item[0], X_tmp), axis=0)
            y_tmp = np.concatenate((item[1], y_tmp), axis=0)

        self.X = X_tmp[:-1, ]
        self.y = y_tmp[:-1, ]
Exemplo n.º 17
0
    def generate_features(self, out_file):

        data = self.train_data + self.val_data + self.test_data
        transform = imagenet_transform('test')
        feat_extractor = tmodels.resnet18(pretrained=True)
        feat_extractor.fc = nn.Sequential()
        feat_extractor.eval().cuda()

        image_feats = []
        image_files = []
        for chunk in tqdm.tqdm(utils.chunks(data, 512),
                               total=len(data) // 512):
            files, attrs, objs = zip(*chunk)
            imgs = list(map(self.loader, files))
            imgs = list(map(transform, imgs))
            feats = feat_extractor(torch.stack(imgs, 0).cuda())
            image_feats.append(feats.data.cpu())
            image_files += files
        image_feats = torch.cat(image_feats, 0)
        print('features for %d images generated' % (len(image_files)))

        torch.save({'features': image_feats, 'files': image_files}, out_file)
Exemplo n.º 18
0
def aes_oracle(plaintext):
    rng = random.Random()
    prefix = random_bytearray(rng.randint(5, 10))
    suffix = random_bytearray(rng.randint(5, 10))
    plaintext = prefix + plaintext + suffix

    plain_chunks = list(chunks(plaintext, 16))
    last_chunk = pkcs7padding(plain_chunks[-1], 16)
    padded_chunks = plain_chunks[:-1] + [last_chunk]
    padded_plaintext = b""
    for chunk in padded_chunks:
        padded_plaintext += chunk

    aes_key = random_bytearray(16)

    if (rng.randint(0, 1) == 0):  # ECB mode
        print("Encrypting in ECB mode...")
        return encrypt_aes_ecb(padded_plaintext, aes_key)
    else:  # CBC mode
        aes_iv = random_bytearray(16)
        print("Encrypting in CBC mode...")
        return encrypt_aes_cbc(padded_plaintext, aes_key, aes_iv)
Exemplo n.º 19
0
    def generate_features(self, out_file, model):
        '''
        Inputs
            out_file: Path to save features
            model: String of extraction model
        '''
        # data = self.all_data
        data = ospj(self.root, 'images')
        files_before = glob(ospj(data, '**', '*.jpg'), recursive=True)
        files_all = []
        for current in files_before:
            parts = current.split('/')
            if "cgqa" in self.root:
                files_all.append(parts[-1])
            else:
                files_all.append(os.path.join(parts[-2], parts[-1]))
        transform = dataset_transform('test', self.norm_family)
        feat_extractor = get_image_extractor(arch=model).eval()
        feat_extractor = feat_extractor.to(device)

        image_feats = []
        image_files = []
        for chunk in tqdm(chunks(files_all, 512),
                          total=len(files_all) // 512,
                          desc=f'Extracting features {model}'):

            files = chunk
            imgs = list(map(self.loader, files))
            imgs = list(map(transform, imgs))
            feats = feat_extractor(torch.stack(imgs, 0).to(device))
            image_feats.append(feats.data.cpu())
            image_files += files
        image_feats = torch.cat(image_feats, 0)
        print('features for %d images generated' % (len(image_files)))

        torch.save({'features': image_feats, 'files': image_files}, out_file)
Exemplo n.º 20
0
def clean_hidden_files_from_db():
    photos_in_db = Photo.select(Photo.local_path)

    logger.info("Total photos in DB: %d", photos_in_db.count())

    hidden_files_to_remove = []

    for p in photos_in_db:
        file_name = os.path.basename(p.local_path)
        if file_name.startswith('.'):
            logger.info("Hidden file found: %s", file_name)

            hidden_files_to_remove.append(p.local_path)

    logger.info("Total hidden files count: %d", len(hidden_files_to_remove))

    if hidden_files_to_remove:
        with db.atomic():
            for files_chunk in chunks(hidden_files_to_remove, 300):
                _removed_cnt = Photo.delete().where(
                    (Photo.local_path << files_chunk)
                ).execute()

                logger.info("Removed: %d", _removed_cnt)
Exemplo n.º 21
0
def create_graph_batch(config,
                       graph,
                       batch_data,
                       initial_pos_vel_known,
                       shuffle=True,
                       return_only_unpadded=True,
                       multistep=False,
                       start_episode=None):
    input_graph_lst, target_graph_lst = [], []
    random_episode_idx_starts = []
    for data in batch_data:
        input_graphs, target_graphs, exp_id = graph_to_input_and_targets_single_experiment(
            config,
            graph,
            data,
            initial_pos_vel_known,
            return_only_unpadded=return_only_unpadded)
        if not shuffle:
            input_graph_lst.append(input_graphs)
            target_graph_lst.append(target_graphs)
        else:
            input_graph_lst.append((input_graphs, exp_id))
            target_graph_lst.append((target_graphs, exp_id))

    if not shuffle:
        input_graph_lst = list(input_graph_lst)
        target_graph_lst = list(target_graph_lst)

        input_graph_lst = list(chunks(input_graph_lst,
                                      config.train_batch_size))
        target_graph_lst = list(
            chunks(target_graph_lst, config.train_batch_size))

        #input_graph_lst = [graph for lst in input_graph_lst for graph in lst]
        #target_graph_lst = [graph for lst in target_graph_lst for graph in lst]

        return input_graph_lst, target_graph_lst, 0

    if not multistep:
        "flatten lists"
        input_graph_lst = [(lst, tpl_e2) for tpl_e1, tpl_e2 in input_graph_lst
                           for lst in tpl_e1]
        target_graph_lst = [(lst, tpl_e2)
                            for tpl_e1, tpl_e2 in target_graph_lst
                            for lst in tpl_e1]

        "shuffle lists"
        shuffled_list = list(zip(input_graph_lst, target_graph_lst))

        random.shuffle(shuffled_list)
        """ ensure that no batch has input/output graph with the same experiment id """
        input_batches, target_batches = ensure_batch_has_no_sample_with_same_exp_id(
            config, shuffled_list)
    else:
        input_batches = []
        target_batches = []
        inp_ids = []
        targ_ids = []

        for tupl_inp, tupl_targ in zip(input_graph_lst, target_graph_lst):
            minimum_exp_leng = len(tupl_inp[0]) - 1
            """ we want at least n_prediction samples: """
            if start_episode is None:
                random_start_episode_idx = random.randint(
                    0, minimum_exp_leng - config.n_predictions)
                exp_id = tupl_inp[1]
                random_episode_idx_starts.append(
                    (exp_id, random_start_episode_idx))
            else:
                random_start_episode_idx = start_episode
            """ also take n_prediction samples from input graphs because we need the control input from these: """
            input_graph = tupl_inp[
                0][random_start_episode_idx:random_start_episode_idx +
                   config.n_predictions]
            # targets are shifted by one, i.e. same indexing rule as for input graphs applies here
            target_graphs = tupl_targ[
                0][random_start_episode_idx:random_start_episode_idx +
                   config.n_predictions]
            """ just a sanity check"""
            inp_ids.append(tupl_inp[1])
            targ_ids.append(tupl_targ[1])

            input_batches.append(input_graph)
            target_batches.append(target_graphs)

        assert all(len(x) == config.n_predictions
                   for x in target_batches), "not all lists have equal length"
        assert all(inp_ids[i] == targ_ids[i] for i in range(len(inp_ids)))
        input_batches = [input_batches]
        target_batches = [target_batches]

    return input_batches, target_batches, random_episode_idx_starts
Exemplo n.º 22
0
def eod_classic_scenarios_report(positions, all_sub_companies, ip, headers, valuation_date, pricing_environment):
    sub_positions = positions[
        ['positionId', 'tradeId', 'asset.underlyerInstrumentId', 'asset.underlyerMultiplier', 'counterPartyName',
         'bookName']]
    sub_positions.rename(columns={'bookName': 'subsidiary', 'asset.underlyerInstrumentId': 'underlyerInstrumentId',
                                  'asset.underlyerMultiplier': 'multiplier', 'counterPartyName': 'partyName'},
                         inplace=True)
    sub_positions.fillna({'multiplier': 1}, inplace=True)

    instrument_ids = list(set(sub_positions['underlyerInstrumentId']))
    instrument_scenario_shock_dict = get_all_instrument_scenario_shock_dict(instrument_ids, headers)
    # api prcSpotScenarios takes lots of time due to too many trades and too many scenarios, use thread pool
    # but we need to decide which combination of pool size and batch size is the optimum solution
    # pool = Pool(POOL_SIZE)
    scenarios = []
    for scenario_type in CLASSIC_SCENARIOS:
        scenario_group = sub_positions.groupby('underlyerInstrumentId')
        print('pricing scenario '+scenario_type+' with '+str(len(scenario_group))+' instruments')
        cnt = 0
        for instrument, positions_df in sub_positions.groupby('underlyerInstrumentId'):
            cnt = cnt + 1
            print('\t'+str(cnt)+': pricing '+instrument+' with '+str(len(positions_df))+' positions')
            shock = get_scenario_shock(scenario_type, instrument, instrument_scenario_shock_dict)
            trade_batches = list(utils.chunks(list(positions_df['tradeId'].dropna().unique()), MAX_PRICING_TRADES_NUM))
            for trade_batch in trade_batches:
                res = price_scenarios(trade_batch, shock, scenario_type, ip, headers, valuation_date, pricing_environment)
                scenarios.extend(res)
            #pool.apply_async(func=price_scenarios, args=(trades, shock, scenario_type, ip, headers),
            #                 callback=lambda res: scenarios.extend(res))
    # pool.close()
    # pool.join()
    scenarios_df = json_normalize(scenarios)
    scenarios_df = scenarios_df[scenarios_df.positionId.isin(sub_positions.positionId)]
    scenarios_df.rename(
        columns={'scenarioResult.delta': 'delta', 'scenarioResult.gamma': 'gamma', 'scenarioResult.theta': 'theta',
                 'scenarioResult.vega': 'vega', 'scenarioResult.rhoR': 'rhoR',
                 'scenarioResult.pnlChange': 'pnlChange', 'scenarioResult.underlyerPrice': 'underlyerPrice'},
        inplace=True)
    scenarios_df = scenarios_df[['positionId', 'scenarioType', 'delta', 'gamma', 'theta', 'vega', 'rhoR', 'pnlChange',
                                 'underlyerPrice']].fillna(0)
    scenarios_df.replace([np.inf, -np.inf, 'Infinity', '-Infinity', 'NaN', 'nan'], np.nan, inplace=True)
    scenarios_df.fillna(0, inplace=True)
    scenarios_df = scenarios_df.merge(sub_positions, on='positionId', how='left')
    scenarios_df['deltaCash'] = scenarios_df['delta'] * scenarios_df['underlyerPrice']
    scenarios_df['gammaCash'] = scenarios_df['gamma'] * scenarios_df['underlyerPrice'] * scenarios_df[
        'underlyerPrice'] / 100

    scenarios_df['delta'] = scenarios_df.apply(lambda x: np.float64(x['delta']) / np.float64(x['multiplier']), axis=1)
    scenarios_df['gamma'] = scenarios_df['gamma'] * scenarios_df['underlyerPrice'] / scenarios_df['multiplier'] / 100
    scenarios_df['theta'] = scenarios_df['theta'] / 365
    scenarios_df['vega'] = scenarios_df['vega'] / 100
    scenarios_df['rho'] = scenarios_df['rhoR'] / 100

    reports = []
    columns = ['scenarioType', 'underlyerInstrumentId', 'delta', 'deltaCash', 'gamma', 'gammaCash', 'theta', 'vega',
               'rho', 'pnlChange']

    key = ['scenarioType', 'underlyerInstrumentId']
    all_market_scenarios = scenarios_df[columns].groupby(key).sum().reset_index()
    all_market_scenarios['reportType'] = 'MARKET'

    key = ['scenarioType', 'subsidiary', 'underlyerInstrumentId']
    subsidiary_scenarios = scenarios_df[columns + ['subsidiary']].groupby(key).sum().reset_index()
    subsidiary_scenarios['reportType'] = 'SUBSIDIARY'

    key = ['scenarioType', 'partyName', 'underlyerInstrumentId']
    party_scenarios = scenarios_df[columns + ['partyName']]
    party_scenarios['isSubsidiary'] = party_scenarios.apply(
        lambda row: is_subsidiary(row['partyName'], all_sub_companies), axis=1)
    party_scenarios = party_scenarios[~party_scenarios.isSubsidiary].groupby(key).sum().reset_index()
    party_scenarios[party_scenarios.select_dtypes(include=['number']).columns] *= -1
    party_scenarios['reportType'] = 'PARTY'
    party_scenarios.drop('isSubsidiary', axis=1, inplace=True)

    reports.extend(all_market_scenarios.to_dict(orient='records'))
    reports.extend(subsidiary_scenarios.to_dict(orient='records'))
    reports.extend(party_scenarios.to_dict(orient='records'))

    return reports
def eod_spot_scenarios_by_market_report(positions, ip, headers, pe_description,
                                        all_sub_companies, valuation_date,
                                        pricing_environment):
    sub_positions = positions[[
        'positionId', 'bookName', 'asset.underlyerInstrumentId',
        'asset.underlyerMultiplier', 'counterPartyName'
    ]]
    sub_positions.rename(columns={
        'bookName': 'subsidiary',
        'asset.underlyerInstrumentId': 'underlyerInstrumentId',
        'asset.underlyerMultiplier': 'multiplier',
        'counterPartyName': 'partyName'
    },
                         inplace=True)
    sub_positions.fillna({'multiplier': 1}, inplace=True)
    trades = list(positions['tradeId'].dropna().unique())

    scenarios = []
    trade_batches = list(utils.chunks(trades, MAX_PRICING_TRADES_NUM))
    for trade_batch in trade_batches:
        print("\tstart pricing " + str(len(trade_batch)) + " trades")
        start = timer()
        scenario_batch = price_scenarios(trade_batch, ip, headers,
                                         valuation_date, pricing_environment)
        end = timer()
        print('\tpricing takes:' + str(end - start) + ' seconds')
        scenarios.extend(scenario_batch)
    print('finish pricing all scenarios')
    start = timer()
    scenarios_df = json_normalize(scenarios)
    end = timer()
    print('\tconvert scenario results takes:' + str(end - start) + ' seconds')
    start = timer()
    scenarios_df = scenarios_df[scenarios_df.positionId.isin(
        sub_positions.positionId)]
    scenarios_df.rename(columns={
        'scenarioResult.delta': 'delta',
        'scenarioResult.gamma': 'gamma',
        'scenarioResult.theta': 'theta',
        'scenarioResult.vega': 'vega',
        'scenarioResult.rhoR': 'rhoR',
        'scenarioResult.pnlChange': 'pnlChange',
        'scenarioResult.underlyerPrice': 'underlyerPrice'
    },
                        inplace=True)
    scenarios_df = scenarios_df[[
        'positionId', 'scenarioId', 'delta', 'gamma', 'theta', 'vega', 'rhoR',
        'pnlChange', 'underlyerPrice'
    ]].fillna(0)
    scenarios_df.replace(
        [np.inf, -np.inf, 'Infinity', '-Infinity', 'NaN', 'nan'],
        np.nan,
        inplace=True)
    scenarios_df.fillna(0, inplace=True)
    end = timer()
    print('\tpostprocess scenario results takes:' + str(end - start) +
          ' seconds')
    start = timer()
    scenarios_df = scenarios_df.merge(sub_positions,
                                      on='positionId',
                                      how='left')
    end = timer()
    print('\tmerge scenario results takes:' + str(end - start) + ' seconds')
    start = timer()
    scenarios_df[
        'deltaCash'] = scenarios_df['delta'] * scenarios_df['underlyerPrice']
    scenarios_df['gammaCash'] = scenarios_df['gamma'] * scenarios_df[
        'underlyerPrice'] * scenarios_df['underlyerPrice'] / 100

    scenarios_df['delta'] = scenarios_df.apply(
        lambda x: np.float64(x['delta']) / np.float64(x['multiplier']), axis=1)
    scenarios_df['gamma'] = scenarios_df['gamma'] * scenarios_df[
        'underlyerPrice'] / scenarios_df['multiplier'] / 100
    scenarios_df['theta'] = scenarios_df['theta'] / 365
    scenarios_df['vega'] = scenarios_df['vega'] / 100
    scenarios_df['rhoR'] = scenarios_df['rhoR'] / 100
    end = timer()
    print('\tcalc scenario results takes:' + str(end - start) + ' seconds')
    start = timer()
    all_market_scenarios = scenarios_df[[
        'scenarioId', 'underlyerInstrumentId', 'delta', 'deltaCash', 'gamma',
        'gammaCash', 'theta', 'vega', 'rhoR', 'pnlChange'
    ]].groupby(['scenarioId', 'underlyerInstrumentId']).sum().reset_index()
    subsidiary_scenarios = scenarios_df[[
        'scenarioId', 'underlyerInstrumentId', 'subsidiary', 'delta',
        'deltaCash', 'gamma', 'theta', 'vega', 'rhoR', 'gammaCash', 'pnlChange'
    ]].groupby(['scenarioId', 'subsidiary',
                'underlyerInstrumentId']).sum().reset_index()
    counter_party_scenarios = scenarios_df[[
        'scenarioId', 'underlyerInstrumentId', 'partyName', 'delta',
        'deltaCash', 'gamma', 'theta', 'vega', 'rhoR', 'gammaCash', 'pnlChange'
    ]].groupby(['scenarioId', 'partyName',
                'underlyerInstrumentId']).sum().reset_index()
    counter_party_scenarios[counter_party_scenarios.select_dtypes(
        include=['number']).columns] *= -1
    end = timer()
    print('\tgroup scenario results takes:' + str(end - start) + ' seconds')
    start = timer()

    reports = []
    key = ['underlyerInstrumentId']
    for instrument, scenario in all_market_scenarios.groupby(key):
        reports.append({
            'reportType':
            'MARKET',
            'reportName':
            SPOT_SCENARIOS_BY_MARKET_REPORT_ + pe_description,
            'instrumentId':
            instrument,
            'scenarios':
            scenario.drop(key, axis=1).to_dict(orient='records')
        })

    key = ['subsidiary', 'underlyerInstrumentId']
    for (subsidiary,
         instrument), scenario in subsidiary_scenarios.groupby(key):
        reports.append({
            'reportType':
            'SUBSIDIARY',
            'reportName':
            SPOT_SCENARIOS_BY_SUBSIDIARY_REPORT_ + pe_description,
            'contentName':
            subsidiary,
            'instrumentId':
            instrument,
            'scenarios':
            scenario.drop(key, axis=1).to_dict(orient='records')
        })

    key = ['partyName', 'underlyerInstrumentId']
    for (party_name,
         instrument), scenario in counter_party_scenarios.groupby(key):
        if all_sub_companies is not None and party_name in all_sub_companies:
            continue
        reports.append({
            'reportType':
            'PARTY',
            'reportName':
            SPOT_SCENARIOS_BY_COUNTER_PARTY_REPORT_ + pe_description,
            'contentName':
            party_name,
            'instrumentId':
            instrument,
            'scenarios':
            scenario.drop(key, axis=1).to_dict(orient='records')
        })
    end = timer()
    print('\tgenerate scenario reports takes:' + str(end - start) + ' seconds')
    return reports
Exemplo n.º 24
0
def generate_summaries_or_translations(
    data_dir: str,
    out_dir: str,
    model_path: str,
    config_path: str,
    batch_size: int = 8,
    device: str = DEFAULT_DEVICE,
    fp16=False,
    task="summarization",
    prefix=None,
    max_source_length=1024,
    max_target_length=142,
    eval_beams=5,
    eval_max_gen_length=142,
    n_obs=-1,
    type_path="test",
    num_return_sequences=1,
    distill=None,
    num_layers=None,
    do_encoder=False,
    do_decoder=False,
    **generate_kwargs,
) -> Dict:

    out_dir = Path(out_dir)
    save_path = out_dir.joinpath(
        f"rank_{utils.distributed_utils.get_rank()}_output.json")

    if num_return_sequences > eval_beams:
        eval_beams = num_return_sequences

    ### Define BART model
    # Config from "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-cnn/config.json
    # Vocab modified to 50265 to be consistent with facebook/bart-large default
    config = BartConfig(**json.load(open(config_path, "r")))
    config.fp16 = fp16
    model = BartForConditionalGeneration.from_pretrained(
        model_path, config=config).to(device)

    # if distilling, change model
    if distill == "sft":
        model = distill_sft(model, num_layers, do_encoder, do_decoder)

    if fp16:
        model = model.half()
    model.eval()

    tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
    logger.info(f"Inferred tokenizer type: {tokenizer.__class__}"
                )  # if this is wrong, check config.model_type.

    start_time = time.time()
    # update config with task specific params
    use_task_specific_params(model, task)
    if prefix is None:
        prefix = prefix or getattr(model.config, "prefix", "") or ""

    ds = Seq2SeqDataset(tokenizer,
                        data_dir,
                        max_source_length,
                        max_target_length,
                        type_path=type_path,
                        n_obs=n_obs,
                        prefix=prefix)

    # I set shuffle=True for a more accurate progress bar.
    # If all the longest samples are first, the prog bar estimate is too high at the beginning.
    is_distributed = True if utils.distributed_utils.get_world_size(
    ) > 1 else False
    sampler = ds.make_sortish_sampler(batch_size,
                                      distributed=is_distributed,
                                      add_extra_examples=False,
                                      shuffle=True)
    data_loader = DataLoader(ds,
                             sampler=sampler,
                             batch_size=batch_size,
                             collate_fn=ds.collate_fn)

    results = []
    with torch.no_grad():
        for batch in tqdm(data_loader):
            t0 = time.time()

            summaries = model.generate(
                input_ids=batch["input_ids"].to(device),
                attention_mask=batch["attention_mask"].to(device),
                use_cache=True,
                num_return_sequences=num_return_sequences,
                num_beams=eval_beams,
                max_length=eval_max_gen_length,
                num_beam_groups=1,
                output_scores=False,
                return_dict_in_generate=False,
                encoder_no_repeat_ngram_size=0,
                diversity_penalty=0.0,
                **generate_kwargs,
            )
            preds = tokenizer.batch_decode(summaries,
                                           skip_special_tokens=True,
                                           clean_up_tokenization_spaces=False)
            ids = batch["ids"]
            if num_return_sequences > 1:
                preds = chunks(
                    preds, num_return_sequences
                )  # batch size chunks, each of size num_return_seq

            eval_time = time.time() - t0
            for i, pred in enumerate(preds):
                store_time = eval_time if i == 0 else None  #only store latency for element 0 of every batch
                results.append(
                    dict(pred=pred, id=ids[i].item(), eval_time=store_time))

    save_json(results, save_path)
    runtime = int(time.time() - start_time)  # seconds
    num_replicas = sampler.num_replicas if is_distributed else 1
    n_obs = len(results)
    return results, num_replicas, dict(n_obs=n_obs,
                                       eval_only_runtime=runtime,
                                       seconds_per_sample=round(
                                           runtime / n_obs, 4))
Exemplo n.º 25
0
def create_tfrecords_from_dir(config,
                              source_path,
                              dest_path,
                              discard_varying_number_object_experiments=True,
                              n_sequences_per_batch=10,
                              test_size=0.2,
                              pad_to=None,
                              use_fixed_rollout=None):
    """
    specify pad_to to e.g. 15 if all experiments should be padded to length 15 (simply copies the last valid element n times s.t. resulting experiment length is 15).
    Sets gripper velocity and object velocities to zero for the padded samples.

    specify use_fixed_rollout to e.g. 7 to remove all experiments that have more or less rollout steps.

    :param source_path:
    :param dest_path:
    :param name:
    :return:
    """

    assert (pad_to is not None and use_fixed_rollout is None) or (pad_to is None and use_fixed_rollout is not None) or \
           (pad_to is None and use_fixed_rollout is None), "either pad_to and use_fixed_rollout are both None or just one is set (and not both)"

    print('-------------- DATA WILL BE PADDED TO {} --------------'.format(
        pad_to))
    use_object_seg_data_only_for_init = config.use_object_seg_data_only_for_init
    depth_data_provided = config.depth_data_provided
    use_compression = config.use_tfrecord_compression

    file_paths = get_all_experiment_file_paths_from_dir(source_path)
    """ filter out experiments that have a different rollout length than wanted """
    if use_fixed_rollout is not None:
        file_paths = [
            path for path in file_paths if len(path) == use_fixed_rollout
        ]

    train_paths, test_paths = train_test_split(file_paths, test_size=test_size)
    filenames_split_train = list(chunks(train_paths, n_sequences_per_batch))
    filenames_split_test = list(chunks(test_paths, n_sequences_per_batch))

    filenames = filenames_split_train + filenames_split_test
    train_ids = ["train"] * len(filenames_split_train)
    test_ids = ["test"] * len(filenames_split_test)
    identifiers = np.concatenate([train_ids, test_ids])

    if use_compression:
        options = tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.GZIP)
    else:
        options = None

    for i, queue in enumerate(zip(filenames, identifiers)):
        all_batches = queue[0]
        name = queue[1]

        loaded_batch = load_all_experiments_from_dir(all_batches)

        filename = os.path.join(
            dest_path,
            name + str(i + 1) + '_of_' + str(len(filenames)) + '.tfrecords')
        print('Writing', filename)

        identifier = "_object_full_seg_rgb"
        depth = None
        if depth_data_provided:
            identifier = "_object_full_seg_rgb_depth"

        with tf.python_io.TFRecordWriter(path=filename,
                                         options=options) as writer:
            for experiment in loaded_batch.values():

                if not experiment:
                    continue

                if discard_varying_number_object_experiments:
                    skip = check_if_skip(experiment)
                    if skip:
                        continue
                """ add gripper velocity """
                for key, value in experiment.items():
                    if key == 0:
                        vel = np.zeros(shape=3, dtype=np.float64)
                    else:
                        vel = (experiment[key - 1]['gripperpos'] -
                               experiment[key]['gripperpos']) * 240.0
                    value['grippervel'] = vel

                experiment_length = len(experiment)

                if pad_to is not None:
                    len_to_pad = pad_to - experiment_length
                    for i in range(len_to_pad):
                        last_element_to_copy = experiment[experiment_length -
                                                          1]  # zero indexed
                        pad_element = last_element_to_copy.copy()
                        pad_element['grippervel'] = np.zeros(
                            pad_element['grippervel'].shape)
                        objvelocities = pad_element['objvel'].tolist()
                        for k, v in objvelocities.items():
                            objvelocities[k] = np.zeros(v.shape)
                        pad_element['objvel'] = np.asarray(objvelocities)
                        experiment[experiment_length + i] = pad_element

                number_of_total_objects = get_number_of_segment(
                    experiment[0]['seg'])
                n_manipulable_objects = number_of_total_objects - 2  # container and gripper subtracted (background is removed)
                experiment_id = int(experiment[0]['experiment_id'])

                # all data objects are transformed s.t. for each data a list consisting of 'experiment_length' ndarrays returned,
                # if data is multi-dimensional (e.g. segments for each object per times-step), ndarrays are stacked along first
                # dimension
                if depth_data_provided:
                    objects_segments, gripperpos, grippervel, objpos, objvel, img, seg, depth = add_experiment_data_to_lists(
                        experiment,
                        identifier,
                        use_object_seg_data_only_for_init=
                        use_object_seg_data_only_for_init,
                        depth_data_provided=depth_data_provided)
                    depth = [_bytes_feature(i.tostring()) for i in depth]
                else:
                    objects_segments, gripperpos, grippervel, objpos, objvel, img, seg = add_experiment_data_to_lists(
                        experiment,
                        identifier,
                        use_object_seg_data_only_for_init=
                        use_object_seg_data_only_for_init,
                        depth_data_provided=depth_data_provided)
                if experiment_length < pad_to:
                    assert not np.any(
                        grippervel[experiment_length:]
                    ), "padded gripperpositions are not zero although they should be"
                    assert not np.any(
                        objvel[experiment_length:]
                    ), "padded objvelocities are not zero although they should be"
                    np.testing.assert_array_equal(img[experiment_length - 1],
                                                  img[experiment_length])

                imgs = [_bytes_feature(i.tostring()) for i in img]
                segs = [_bytes_feature(i.tostring()) for i in seg]
                gripperpositions = [
                    _bytes_feature(i.tostring()) for i in gripperpos
                ]
                grippervelocities = [
                    _bytes_feature(i.tostring()) for i in grippervel
                ]

                # concatenate all object positions/velocities into an ndarray per experiment step
                objpos = [_bytes_feature(i.tostring()) for i in objpos]

                objvel = [_bytes_feature(i.tostring()) for i in objvel]

                objects_segments = [
                    _bytes_feature(i.tostring()) for i in objects_segments
                ]

                feature_list = {
                    'img':
                    tf.train.FeatureList(feature=imgs),
                    'seg':
                    tf.train.FeatureList(feature=segs),
                    'gripperpos':
                    tf.train.FeatureList(feature=gripperpositions),
                    'grippervel':
                    tf.train.FeatureList(feature=grippervelocities),
                    'objpos':
                    tf.train.FeatureList(feature=objpos),
                    'objvel':
                    tf.train.FeatureList(feature=objvel),
                    'object_segments':
                    tf.train.FeatureList(feature=objects_segments)
                }
                if depth_data_provided:
                    feature_list['depth'] = tf.train.FeatureList(feature=depth)

                feature_lists = tf.train.FeatureLists(
                    feature_list=feature_list)

                example = tf.train.SequenceExample(feature_lists=feature_lists)
                example.context.feature[
                    'experiment_length'].int64_list.value.append(
                        len(experiment))
                example.context.feature[
                    'unpadded_experiment_length'].int64_list.value.append(
                        experiment_length)
                example.context.feature[
                    'experiment_id'].int64_list.value.append(experiment_id)
                example.context.feature[
                    'n_total_objects'].int64_list.value.append(
                        number_of_total_objects)
                example.context.feature[
                    'n_manipulable_objects'].int64_list.value.append(
                        n_manipulable_objects)

                writer.write(example.SerializeToString())
Exemplo n.º 26
0
    def __init__(self):
        super(Encoder, self).__init__()
        self.activation = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.encoder_conv = []
        self.encoder_conv.append(
            nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(64,
                      64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(64,
                      128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(128,
                      128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(128,
                      256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(256,
                      256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(256,
                      256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(256,
                      512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(512,
                      512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(512,
                      512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(512,
                      512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(512,
                      512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_conv.append(
            nn.Conv2d(512,
                      512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.encoder_batchnorm = []
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(64,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(64,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(128,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(128,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(256,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(256,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(256,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(512,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(512,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(512,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(512,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(512,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))
        self.encoder_batchnorm.append(
            nn.BatchNorm2d(512,
                           eps=1e-05,
                           momentum=0.1,
                           affine=True,
                           track_running_stats=True))

        # initialize encoder
        encoder_state_dict = model_zoo.load_url(
            'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth')
        encoder_weights = chunks([
            item[1] for item in list(encoder_state_dict.items())
            if 'features' in item[0]
        ], 6)
        for n, item in enumerate(encoder_weights):
            self.encoder_conv[n].weight.data = item[0]
            self.encoder_conv[n].bias.data = item[1]
            self.encoder_batchnorm[n].weight.data = item[2]
            self.encoder_batchnorm[n].bias.data = item[3]
            self.encoder_batchnorm[n].running_mean.data = item[4]
            self.encoder_batchnorm[n].running_var.data = item[5]

        self.encoder_conv = nn.ModuleList(self.encoder_conv)
        self.encoder_batchnorm = nn.ModuleList(self.encoder_batchnorm)
Exemplo n.º 27
0
def run(debug=False):
    """
    Gets project image from cytomine

    Args:
        debug (bool): If true will save annotations individually and plot any error

    Example:
      python main.py --cytomine_host 'localhost-core' --cytomine_public_key 'dadb7d7a-5822-48f7-ab42-59bce27750ae' --cytomine_private_key 'd73f4602-51d2-4d15-91e4-d4cc175d65fd' --cytomine_id_project 187 --cytomine_id_image_instance 375 --cytomine_id_software 228848

      python main.py --cytomine_host 'localhost-core' --cytomine_public_key 'b6ebb23c-00ff-427b-be24-87b2a82490df' --cytomine_private_key '6812f09b-3f33-4938-82ca-b23032d377fd' --cytomine_id_project 154 --cytomine_id_image_instance 3643

      python main.py --cytomine_host 'localhost-core' --cytomine_public_key 'd2be8bd7-2b0b-40c3-9e81-5ad5765568f3' --cytomine_private_key '6dfe27d7-2ad1-4ca2-8ee9-6321ec3f1318' --cytomine_id_project 197 --cytomine_id_image_instance 2140 --cytomine_id_software 2633

      docker run --gpus all -it --rm --mount type=bind,source=/home/giussepi/Public/environments/Cytomine/cyto_CRLM/,target=/CRLM,bind-propagation=private --network=host ttt --cytomine_host 'localhost-core' --cytomine_public_key 'd2be8bd7-2b0b-40c3-9e81-5ad5765568f3' --cytomine_private_key '6dfe27d7-2ad1-4ca2-8ee9-6321ec3f1318' --cytomine_id_project 197 --cytomine_id_image_instance 31296 --cytomine_id_software 79732
    """

    parser = ArgumentParser(prog="Cytomine Python client example")

    # Cytomine connection parameters
    parser.add_argument('--cytomine_host',
                        dest='host',
                        default='demo.cytomine.be',
                        help="The Cytomine host")
    parser.add_argument('--cytomine_public_key',
                        dest='public_key',
                        help="The Cytomine public key")
    parser.add_argument('--cytomine_private_key',
                        dest='private_key',
                        help="The Cytomine private key")
    parser.add_argument('--cytomine_id_project',
                        dest='id_project',
                        help="The project from which we want the images")
    parser.add_argument('--cytomine_id_software',
                        dest='id_software',
                        help="The software to be used to process the image")
    parser.add_argument('--cytomine_id_image_instance',
                        dest='id_image_instance',
                        help="The image to which the annotation will be added")

    params, _ = parser.parse_known_args(sys.argv[1:])

    with CytomineJob.from_cli(sys.argv[1:]) as cytomine:
        # TODO: To be tested on TITANx
        img = ImageInstance().fetch(params.id_image_instance)
        download_image(img)
        process_wsi_and_save(get_container_image_path(img))
        new_annotations = generate_polygons(get_container_image_path(img),
                                            adapt_to_cytomine=True)
        annotation_collection = None

        for label_key in new_annotations:
            # Sending annotation batches to the server
            for sub_list in chunks(new_annotations[label_key],
                                   ANNOTATION_BATCH):
                if not debug:
                    annotation_collection = AnnotationCollection()

                for exterior_points in sub_list:
                    if debug:
                        annotation_collection = AnnotationCollection()

                    annotation_collection.append(
                        Annotation(location=Polygon(
                            exterior_points.astype(int).reshape(
                                exterior_points.shape[0],
                                exterior_points.shape[2]).tolist()).wkt,
                                   id_image=params.id_image_instance,
                                   id_project=params.id_project,
                                   id_terms=[CYTOMINE_LABELS[label_key]]))

                    if debug:
                        try:
                            annotation_collection.save()
                        except Exception as e:
                            print(
                                exterior_points.astype(int).reshape(
                                    exterior_points.shape[0],
                                    exterior_points.shape[2]).tolist())
                            plt.plot(*Polygon(
                                exterior_points.astype(int).reshape(
                                    exterior_points.shape[0], exterior_points.
                                    shape[2])).exterior.coords.xy)
                            plt.show()
                            # raise(e)
                            print(e)
                        finally:
                            time.sleep(1)

                if not debug:
                    annotation_collection.save()
                    time.sleep(ANNOTATION_SLEEP_TIME)

        # Adding pie chart labels data as image property
        # TODO: Change delete_results_file to True for final test on titanX
        num_pixels_per_label = get_pie_chart_data(
            get_container_image_path(img), delete_results_file=False)

        for percentage, label_ in zip(num_pixels_per_label, Label.names):
            Property(img, key=label_, value='{}%'.format(percentage)).save()

        remove_image_local_copy(img)

        cytomine.job.update(statusComment="Finished.")
Exemplo n.º 28
0
 def cipher_block(plaintext, idx):
     return list(chunks(aes_function(plaintext), b_size))[idx]
Exemplo n.º 29
0
def get_chunked_arr(q_arr, num=10):
    chunk_size = float(len(q_arr)) / float(num)
    chunk_size = math.ceil(chunk_size)

    chunked_q_arr = chunks(q_arr, chunk_size)
    return chunked_q_arr