def fix_experiments(self, discovered_experiments: List[str]):
     """
     fixing all the given experiments
     """
     logging.info("fixing %d experiments", len(discovered_experiments))
     # enhance metadata where possible
     for experiment in tqdm(discovered_experiments, "correct metadata for experiment"):
         self.fix_metadata(experiment)
     # discover associated blanks by naming pattern and link them as reference to the acquisition object. Based
     for experiment in tqdm(discovered_experiments, "correct blanks, where required"):
         self.fix_blanks(experiment)
示例#2
0
async def main():
    N = int(1e6)
    async for row in tqdm(trange(N, desc="inner"), desc="outer"):
        if row >= N:
            break
    with tqdm(count(), desc="coroutine", total=N + 2) as pbar:
        async for row in pbar:
            if row == N:
                pbar.send(-10)
            elif row < 0:
                assert row == -9
                break
    # should be under 10 seconds
    for i in tqdm.as_completed(list(map(asyncio.sleep, [1] * 10)),
                               desc="as_completed"):
        await i
    def fix_unknown_experiment(self, args):
        """loads an unknown experiment and tries to parse all
        the given file names and updates the related meta data
        to get them into the correct format"""
        logging.info("loading all samples for unknown experiment...")
        # 1. load the experiment and all its samples
        samples = self.stasisClient.load_samples_for_experiment("unknown")

        logging.info("found %d samples", len(samples))
        discovered_experiments = []
        # 2. extract te properties from the file names, like mode, experiment, instrument
        for sample in tqdm(samples, "discovering correct experiments"):
            experiment, instrument, ionmode, sample_id = self.extract_properties(sample['id'])
            sample['experiment'] = experiment
            sample['acquisition']['ionisation'] = "positive" if ionmode.lower().startswith('pos') else "negative"
            logging.info(f"updating acquisition data or {sample['id']}")

            self.update_acquisition_data(sample)
            # now drop old version

            if experiment not in discovered_experiments:
                discovered_experiments.append(experiment)

        self.fix_experiments(discovered_experiments)

        # on the discovered experiment ids

        return {"updated": len(samples)}
async def main(database, out_dir, preprocess, num_workers):
    """
    Event loop
    """
    print(f"Connecting to {database}")
    client = motor.motor_asyncio.AsyncIOMotorClient()
    db = client[database]

    print("Contando...")

    tweets = db.tweets.find()
    pbar = tqdm(total=await db.tweets.estimated_document_count())

    print("Comenzando!")

    queue = asyncio.Queue()

    # Create three worker tasks to process the queue concurrently.
    tasks = []
    for i in range(num_workers):
        task = asyncio.create_task(worker(f'worker-{i}', queue, pbar, out_dir))
        tasks.append(task)

    # Generate random timings and put them into the queue.
    total_sleep_time = 0
    async for tweet in tweets:
        queue.put_nowait(tweet)

    await queue.join()
    for task in tasks:
        task.cancel()
    # Wait until all worker tasks are cancelled.
    await asyncio.gather(*tasks, return_exceptions=True)
示例#5
0
    def test_ww_edges_time(self):
        print("loading and parsing data, this might take a few seconds...")
        time = [datetime.now()]
        train = pd.read_csv("../../data/amazon/train.csv")

        X = train['Text'].tolist()
        cv = CountVectorizer(stop_words="english", min_df=5, max_df=0.9).fit(X)
        n_vocab = len(cv.vocabulary_)
        n_documents = len(X)
        X = jl.Parallel(n_jobs=8)(jl.delayed(lambda doc: [
            x.lower() for x in RegexpTokenizer(r"\w+").tokenize(doc)
            if x.lower() in cv.vocabulary_
        ])(doc) for doc in tqdm(X))
        max_sent_len = max(map(len, X))
        X = np.array(jl.Parallel(n_jobs=8)(
            jl.delayed(lambda doc: [cv.vocabulary_[w] for w in doc] + [-1] *
                       (max_sent_len - len(doc)))(doc) for doc in X),
                     dtype=np.int32)

        # test for the unit test, we are going down the rabbit hole
        assert X.shape == (n_documents, max_sent_len)
        time.append(datetime.now())
        print(f"loading complete!. Took {time[1] - time[0]}")
        print("starting unit test...")
        result = compute_word_word_edges(X,
                                         n_vocab,
                                         n_documents,
                                         max_sent_len,
                                         n_jobs=8)
        print(f"edge shape is {result[0].shape}")
        print(result)
        time.append(datetime.now())
        print(f"graph building took {time[2] - time[1]}")
示例#6
0
    def __init__(
        self,
        queue: Queue,
        filename: str,
        max_interval: int = 2,
        max_chunk: int = 100,
        include_headers: bool = False,
        silent: bool = False,
        early_stop: int = None,
    ):
        self.filename = filename
        if not os.path.exists(filename):
            head, tail = os.path.split(self.filename)
            if len(head) > 0:
                os.makedirs(head, exist_ok=True)
        self._write_headers()

        self.max_interval = max_interval
        self.max_chunk = max_chunk
        self.data_queue = queue
        self.stop_flag = False
        self.early_stop = early_stop
        self.total = 0
        if not silent:
            self.progress = tqdm(desc="written", unit="emails")
        else:
            self.progress = None
        self.state = None
示例#7
0
文件: io.py 项目: plantnet/gbif-dl
async def _download_from_asyncgen(
    items: AsyncGenerator,
    params: DownloadParams,
    tcp_connections: int = 64,
    nb_workers: int = 64,
    batch_size: int = 16,
    retries: int = 1,
    logger: logging.Logger = None,
):
    """Asynchronous downloader that takes an interable and downloads it

    Args:
        items (Union[Generator, AsyncGenerator]): (async/sync) generator that yiels a standardized dict of urls
        params (DownloadParams): Download parameter dict
        tcp_connections (int, optional): Maximum number of concurrent TCP connections. Defaults to 128.
        nb_workers (int, optional): Maximum number of workers. Defaults to 64.
        batch_size (int, optional): Maximum queue batch size. Defaults to 16.
        retries (int, optional): Maximum number of attempts. Defaults to 1.
        logger (logging.Logger, optional): Logger object. Defaults to None.
    Raises:
        NotImplementedError: If generator turns out to be invalid.
    """

    queue = asyncio.Queue(nb_workers)
    progressbar = tqdm(smoothing=0,
                       unit=" Downloads",
                       disable=logger.getEffectiveLevel() > logging.INFO)
    stats = {"failed": 0, "skipped": 0, "success": 0}

    retry_options = ExponentialRetry(attempts=retries)

    async with RetryClient(
            connector=aiohttp.TCPConnector(limit=tcp_connections),
            raise_for_status=True,
            retry_options=retry_options,
            trust_env=True,
    ) as session:

        loop = asyncio.get_event_loop()
        workers = [
            loop.create_task(
                _download_queue(queue,
                                session,
                                stats,
                                params=params,
                                progressbar=progressbar,
                                logger=logger)) for _ in range(nb_workers)
        ]

        # get chunks from async generator and add to async queue
        async with aiostream.stream.chunks(items, batch_size).stream() as chnk:
            async for batch in chnk:
                await queue.put(batch)

        await queue.join()

    for w in workers:
        w.cancel()

    return stats
示例#8
0
async def download(client, path, model_id, url, date=None, id_=None):
    filename = url.split('?', 1)[0].rsplit('/', 1)[-1]
    path_to_file = path / filename

    async with client.stream('GET', url) as r:
        if not r.is_error:
            total = int(r.headers['Content-Length'])
            with tqdm(desc=filename,
                      total=total,
                      unit_scale=True,
                      unit_divisor=1024,
                      unit='B',
                      leave=False) as bar:
                num_bytes_downloaded = r.num_bytes_downloaded
                with open(path_to_file, 'wb') as f:
                    async for chunk in r.aiter_bytes(chunk_size=1024):
                        f.write(chunk)
                        bar.update(r.num_bytes_downloaded -
                                   num_bytes_downloaded)
                        num_bytes_downloaded = r.num_bytes_downloaded

        else:
            r.raise_for_status()

    if path_to_file.is_file():
        if date:
            set_time(path_to_file, convert_date_to_timestamp(date))

        if id_:
            data = (id_, filename)
            operations.write_from_data(data, model_id)
示例#9
0
def select(func,
           start_date="2016-10-01",
           end_date=None,
           callback=print,
           order_book_id_list=[]) -> np.array:
    result = []
    print(getsourcelines(func))
    start_date = get_int_date(start_date)
    if end_date is None:
        end_date = datetime.date.today()
    end_date = get_int_date(end_date)
    data_backend = ExecutionContext.get_data_backend()
    if len(order_book_id_list) == 0:
        order_book_id_list = data_backend.get_order_book_id_list()
    trading_dates = data_backend.get_trading_dates(start=start_date,
                                                   end=end_date)
    set_start_date(trading_dates[0] - 10000)
    for idx, date in enumerate(reversed(trading_dates)):
        if end_date and date > get_int_date(end_date):
            continue
        if date < get_int_date(start_date):
            # 日期小于开始日期则计算完成
            break
        set_current_date(str(date))
        print(f"[{date}]")

        order_book_id_list = tqdm(order_book_id_list)
        for order_book_id in order_book_id_list:
            result.append(choose(order_book_id, func, callback))
            order_book_id_list.set_description(
                "Processing {}".format(order_book_id))
    print("")
    return _list2Array(result)
示例#10
0
文件: tqdm.py 项目: Sheshuk/snap-base
def meter(**kwargs):
    """ 
    A monitoring :term:`step`.
    Display a `tqdm` progress bar, where the progress is set by the incoming data (must be float)
    For full arguments list see `tqdm documentation <https://github.com/tqdm/tqdm#documentation>`_
    
    Keyword Args:
        desc (str)
            Prefix for the progressbar description 
        total (int or float, optional)
            Maximal value

    :Input:
        data (float): A value to be displayed
    :Output:
        data unchanged
    """
    kwargs.setdefault('bar_format','{desc}[{n: 6.4f}]|{bar}|{r_bar}')
    t = tqdm(**kwargs)
    def _f(d):
        t.n = d
        t.update(0)
        t.refresh()
        return d
    return _f
示例#11
0
async def main():
        async with Bergen(
                host="p-tnagerl-lab1",
                port=8000,
                client_id="DSNwVKbSmvKuIUln36FmpWNVE2KrbS2oRX0ke8PJ", 
                client_secret="Gp3VldiWUmHgKkIxZjL2aEjVmNwnSyIGHWbQJo6bWMDoIUlBqvUyoGWUWAe6jI3KRXDOsD13gkYVCZR0po1BLFO9QT4lktKODHDs0GyyJEzmIjkpEOItfdCC4zIa3Qzu",
                name="frankomanko",# if we want to specifically only use pods on this innstance we would use that it in the selector
        ):

                sleep = await Node.asyncs.get(package="basic", interface="sleep")
                        
                
                result = None
                with tqdm(total=100) as pbar:
                        async with sleep.stream_progress({"interval": 1}) as stream:
                                async for item in stream:
                                        result = item
                                        if isinstance(result, dict): break
                                        
                                        progress, message = item.split(":")
                                        try: 
                                                pbar.n = int(progress)
                                                pbar.refresh()
                                        except:
                                                pass
                                        pbar.set_postfix_str(textwrap.shorten(message, width=30, placeholder="..."))
                        pbar.n = 100
                        pbar.refresh()
                        pbar.set_postfix_str("Done")
                print(result)
示例#12
0
 async def achoose(order_book_id_list, func, callback):
     with tqdm(range(len(order_book_id_list))) as pbar:
         async for i in pbar:
             order_book_id = order_book_id_list[i]
             results.append(choose(order_book_id, func, callback))
             if not (i % 10 == 0):
                 # pbar.update(5)
                 pbar.set_description(f"{i}, {order_book_id}")
示例#13
0
async def main():
    N = int(1e6)
    async for row in tqdm(trange(N, desc="inner"), desc="outer"):
        if row >= N:
            break
    with tqdm(count(), desc="coroutine", total=N + 2) as pbar:
        async for row in pbar:
            if row == N:
                pbar.send(-10)
            elif row < 0:
                assert row == -9
                break
    # should be ~1sec rather than ~50s due to async scheduling
    for i in tqdm.as_completed(
        [asyncio.sleep(0.01 * i) for i in range(100, 0, -1)],
            desc="as_completed"):
        await i
示例#14
0
 def reset(self, desc: str = 'Загрузка данных', total_count: Optional[int] = None) -> type(None):
     """
     Обновляет счётчик загрузки
     :param desc: str (optional) default = 'Загрузка данных' Название полосы загрузки
     :param total_count: (optional) Общее количество итерируемых объектов
     """
     try:
         self.load.close()
     except AttributeError:
         pass
     self.load = tqdm(desc=desc, total=total_count, unit='ШТ', ncols=100)
 async def count_here(self, ctx):
     msg_amount: int = 0;
     await ctx.send("Starting counting...")
     for channel in ctx.guild.text_channels:
         print("Now in " + channel.name)
         await ctx.send("Now in " + channel.name + ". Messages processed to this moment: " + str(msg_amount),
                        delete_after=5.0)
         async for message in tqdm(channel.history(limit=None)):
             self.get_message_stats(ctx.guild.id, message)
             msg_amount = msg_amount + 1
     await ctx.send("I'm done! Messages processed: " + str(msg_amount))
async def main(database, out_dir, preprocess, num_workers):
    """
    Event loop
    """
    print(f"Connecting to {database}")
    client = motor.motor_asyncio.AsyncIOMotorClient()
    db = client[database]

    query = {"processed": True}
    print("Contando...")
    total_users = await db.users.count_documents(query)

    print("Buscando usuarios...")
    users_and_tweets = db.users.aggregate([
        {
            "$match": query
        },
        {
            "$lookup": {
                "from": "tweets",
                "localField": "id",
                "foreignField": "user_id",
                "as": "tweets"
            }
        },
        {
            "$project": {
                "id": 1,
                "screen_name": 1,
                "tweets.text": 1
            }
        },
    ])

    pbar = tqdm(total=total_users)

    queue = asyncio.Queue()

    # Create three worker tasks to process the queue concurrently.
    print(f"Creando {num_workers} workers")
    tasks = []
    for i in range(num_workers):
        task = asyncio.create_task(worker(f'worker-{i}', queue, pbar, out_dir))
        tasks.append(task)

    print("Comenzando!")
    async for user in users_and_tweets:
        queue.put_nowait(user)

    await queue.join()
    for task in tasks:
        task.cancel()
    # Wait until all worker tasks are cancelled.
    await asyncio.gather(*tasks, return_exceptions=True)
示例#17
0
async def download_photos(photos_path: Path, photos: list):
    async with aiohttp.ClientSession() as session:
        futures = []
        for i, photo in enumerate(photos, start=1):
            photo_title = "{}_{}_{}_{}.jpg".format(i, photo.get("likes", ""),
                                                   photo["owner_id"],
                                                   photo["id"])
            photo_path = photos_path.joinpath(photo_title)
            futures.append(download_photo(session, photo["url"], photo_path))

        for future in tqdm(asyncio.as_completed(futures), total=len(futures)):
            await future
示例#18
0
async def scan_mem(rest: RestClient,
                   settings: Dict[str, Any],
                   *filters: TxFilter) -> List[Tx]:
    """
    Scan available mempool, for each transaction get the prevout information from the UTXO set
    """
    result: List[Tx] = []
    result_append: Callable = result.append
    pending: set[Task] = set()
    match_policy: Callable = all if settings['filtering']['match_all'] else any
    f_matches: List[Callable] = [f.match for f in filters]
    no_filter: bool = not filters
    print(f'Requested mempool scan\n')
    try:
        mempool: Dict[str, Any] = await rest.get_mempool(True)
        async for tx_done, pending in tqdm(iterate_mem_txs(rest,
                                                           mempool,
                                                           settings['limits']['concurrency_limit']),
                                           miniters=200,
                                           mininterval=0.5,
                                           total=len(mempool)):
            try:
                tx: Tx = await tx_done
            except ClientResponseError:
                continue
            if no_filter:
                result_append(tx)
            elif match_policy(f(tx) for f in f_matches):
                result_append(tx)
    except CancelledError as err:
        # logger.warning('Tasks canceled', exc_info=True)
        print_error('Task canceled', str(err))
    except ClientConnectionError as err:
        print_error('Connection error', 'Cannot establish connection with Bitcoin full node')
        print(str(err))
    except MemoryError as err:
        # logger.warning('MemoryError', exc_info=True)
        print_error('Memory error', str(err))
    except KeyboardInterrupt:
        print_error('Keyboard Interrupt', 'Stopping execution')
    except Exception as err:
        # logger.error('Something went wrong', exc_info=True)
        print_error('Something went wrong', str(err))
    finally:
        # Clean up
        for task in pending:
            task.cancel()
            try:
                await task
            except:
                pass
    return result
示例#19
0
async def scan_blocks(start: int,
                      end: int,
                      rest: RestClient,
                      settings: Dict[str, Any],
                      *filters: TxFilter) -> List[Tx]:
    """
    Scan from `start` block height to `end` block height, included,
    and yield each transaction from those blocks that match the given criteria.

    See `parse_start_and_end` function for valid `start` and `end` values.
    """
    result: List[Tx] = []
    pending: set[Task] = set()
    result_extend: Callable = result.extend
    try:
        chain_info = await rest.get_chain_info()
        start, end = parse_start_and_end(start, end, chain_info, settings['scan']['force'])
        print(f'Requested scan from block {start} to block {end}, included.\n')
        async for block_done, pending in tqdm(iterate_blocks(start, end, rest,
                                                             settings['limits']['memory_limit'],
                                                             settings['limits']['concurrency_limit']),

                                              miniters=1,
                                              mininterval=0.5,
                                              total=end + 1 - start):
            result_extend(iter_filter_block_txs(await block_done,
                                                settings['filtering']['match_all'],
                                                filters))
    except CancelledError as err:
        # logger.warning('Tasks canceled', exc_info=True)
        print_error('Task canceled', str(err))
    except ClientConnectionError:
        # logger.error('Connection error', exc_info=True)
        print_error('Connection error', 'Cannot establish connection with Bitcoin full node')
    except MemoryError as err:
        # logger.warning('MemoryError', exc_info=True)
        print_error('Memory error', str(err))
    except KeyboardInterrupt:
        print_error('Keyboard Interrupt', 'Stopping execution')
    except Exception as err:
        # logger.error('Something went wrong', exc_info=True)
        print_error('Something went wrong', str(err))
    finally:
        # Clean up
        for task in pending:
            task.cancel()
            try:
                await task
            except:
                pass
    return result
示例#20
0
async def _retrieve_and_write_to_disk(*, response: httpx.Response,
                                      outfile: Path, mode: Literal['ab', 'wb'],
                                      desc: str, local_file_size: int,
                                      remote_file_size: int,
                                      remote_file_hash: Optional[str],
                                      verify_hash: bool,
                                      verify_size: bool) -> None:
    hash = hashlib.md5()

    # If we're resuming a download, ensure the already-downloaded
    # parts of the file are fed into the hash function before
    # we continue.
    if verify_hash and local_file_size > 0:
        async with aiofiles.open(outfile, 'rb') as f:
            while True:
                data = await f.read(65536)
                if not data:
                    break
                hash.update(data)

    async with aiofiles.open(outfile, mode=mode) as f:
        with tqdm(desc=desc,
                  initial=local_file_size,
                  total=remote_file_size,
                  unit='B',
                  unit_scale=True,
                  unit_divisor=1024,
                  leave=False) as progress:
            num_bytes_downloaded = response.num_bytes_downloaded

            # TODO Add timeout handling here, too.
            async for chunk in response.aiter_bytes():
                await f.write(chunk)
                progress.update(response.num_bytes_downloaded -
                                num_bytes_downloaded)
                num_bytes_downloaded = (response.num_bytes_downloaded)
                if verify_hash:
                    hash.update(chunk)

        if verify_hash and remote_file_hash is not None:
            assert hash.hexdigest() == remote_file_hash

        # Check the file was completely downloaded.
        if verify_size:
            await f.flush()
            local_file_size = outfile.stat().st_size
            if not local_file_size == remote_file_size:
                raise RuntimeError(
                    f'Server claimed file size would be {remote_file_size} '
                    f'bytes, but downloaded {local_file_size} byes.')
示例#21
0
async def map_doc(index_name="documents"):
    CHUNKSIZE = 50000
    document_loader = DataLoader(COLLECTION_PATH,
                                 chunksize=CHUNKSIZE,
                                 names=["pid", "passage"])
    for _, collection in tqdm(enumerate(document_loader.reader),
                              desc="collection"):
        for i, row in collection.iterrows():
            yield {
                '_op_type': 'index',
                '_index': index_name,
                'pid': row.pid,
                'passage': row.passage,
            }
示例#22
0
async def upload_controller(src_folder: Path, dst_url: str):
    if not src_folder.exists():
        raise FileNotFoundError('Folder you specified does not exist')
    image_files = filter(
        lambda path: Path.is_file(path) and filetype.is_image(str(path)),
        src_folder.rglob('*'))
    loop = asyncio.get_event_loop()
    semaphore = asyncio.Semaphore(value=MAX_CONCURRENT_FILES_PROCESSED)
    async with aiohttp.ClientSession() as session:
        tasks = [
            loop.create_task(upload_task(session, path, dst_url, semaphore))
            for path in image_files
        ]
        for task in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
            await task
示例#23
0
def update_tests(mock_get):
    fetch_languages = {}
    pack_languages = {}
    with tqdm(total=len(test_words)) as progress_bar:
        for tuple_word in test_words:
            word, old_id, language = tuple_word
            parser.set_language(language)
            result = parser.fetch(word, old_id=old_id)
            fetch_languages.setdefault(language, {}).update({word: result})
            pack_languages.setdefault(language, {}).update(
                {word: parser.pack_definitions_and_examples(result)})
            progress_bar.update()
    with open('test_fetch_output.json', 'w') as f:
        f.write(json.dumps(fetch_languages, ensure_ascii=False, indent=4))
    with open('test_pack_output.json', 'w') as f:
        f.write(json.dumps(pack_languages, ensure_ascii=False, indent=4))
示例#24
0
async def process_urls(headers, username, model_id, urls):
    if urls:
        operations.create_database(model_id)
        media_ids = operations.get_media_ids(model_id)
        separated_urls = separate_by_id(urls, media_ids)

        path = pathlib.Path.cwd() / username
        path.mkdir(exist_ok=True)

        # Added pool limit:
        limits = httpx.Limits(max_connections=8, max_keepalive_connections=5)
        async with httpx.AsyncClient(headers=headers, limits=limits, timeout=None) as c:
            add_cookies(c)

            aws = [asyncio.create_task(
                download(c, path, model_id, *url)) for url in separated_urls]

            photo_count = 0
            video_count = 0
            total_bytes_downloaded = 0
            data = 0

            desc = 'Progress: ({p_count} photos, {v_count} videos || {data})'

            with tqdm(desc=desc.format(p_count=photo_count, v_count=video_count, data=data), total=len(aws), colour='cyan', leave=True) as main_bar:
                for coro in asyncio.as_completed(aws):
                    try:
                        media_type, num_bytes_downloaded = await coro
                    except Exception as e:
                        print(e)

                    total_bytes_downloaded += num_bytes_downloaded
                    data = convert_num_bytes(total_bytes_downloaded)

                    if media_type == 'photo':
                        photo_count += 1
                        main_bar.set_description(
                            desc.format(
                                p_count=photo_count, v_count=video_count, data=data), refresh=False)

                    elif media_type == 'video':
                        video_count += 1
                        main_bar.set_description(
                            desc.format(
                                p_count=photo_count, v_count=video_count, data=data), refresh=False)

                    main_bar.update()
示例#25
0
async def _get_org_names_for_list(ip_list, desc="extracting org names"):
    # asyncio
    task_list = [_get_org_name(ip) for ip in ip_list]

    # # if use this code, rm the async and invoke this function directly (no progress)
    # t1 = time.time()
    # loop = asyncio.get_event_loop()
    # finished_tasks = loop.run_until_complete(asyncio.wait(task_list))
    # results = [t.result() for t in finished_tasks[0]]
    # loop.close()
    # t2 = time.time()
    # print("finished in {:.2} s.".format(t2 - t1))

    results = [
        await f for f in tqdm(
            asyncio.as_completed(task_list), desc=desc, total=len(task_list))
    ]
    return results
示例#26
0
async def export_tx_csv(outfile):
    with open(outfile, mode='w') as f:
        writer = csv.writer(f,
                            delimiter='\t',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)
        writer.writerow([
            'type',
            'date',
            'block_height',
            'pool',
            'input_address',
            'input_asset',
            'input_amount',
            'input_usd_price',
            'output_address',
            'output_asset',
            'output_amount',
            'output_usd_price',
            'rune_volume',
            'usd_volume',
            'fee',
            'slip',
        ])
        n = await ThorTx.all_by_date().count()
        print(f'export_tx_csv n = {n}')
        with tqdm(total=n) as pbar:
            async for tx in ThorTx.all():
                if tx.rune_volume is not None:
                    writer.writerow([
                        tx.type, tx.date, tx.block_height, tx.pool1,
                        tx.input_address, tx.input_asset, tx.input_amount,
                        tx.input_usd_price, tx.output_address, tx.output_asset,
                        tx.output_amount, tx.output_usd_price, tx.rune_volume,
                        tx.usd_volume, tx.fee, tx.slip
                    ])
                else:
                    print(f'stop! tx is incomplete {tx}')
                    # break
                pbar.update(1)
示例#27
0
    def __init__(
        self,
        query: str,
        token: str,
        continue_from: State = None,
        early_stop: int = None,
        output_path: str = "data/github_users.csv",
        silent: bool = False,
        state: State = None,
        org_flag: bool = False,
    ):
        self.data_queue = Queue()
        self.search_queue = Queue()
        self.output_queue = Queue()
        self.org_flag = org_flag
        if not silent:
            self.progress = tqdm(desc="progress", position=0, unit="pages")
        else:
            self.progress = None

        self.worker = StalkerWorker(self.data_queue)
        self.writer = CSVWriter(
            self.output_queue,
            filename=output_path,
            early_stop=early_stop,
            silent=silent,
        )
        self.query = query
        self.state = continue_from
        self.start_time = datetime.datetime.utcnow()
        self.early_stop = early_stop
        self.search = Search(
            query=query,
            token=token,
            continue_from=continue_from,
            state=state,
            silent=silent,
            org_flag=self.org_flag,
        )
示例#28
0
 def __init__(self, config, targets):
     self.config = config
     self.targets = targets
     if self.config.url:
         total = len(self.config.pathlist)
     else:
         total = len(self.config.pathlist) * len(self.config.url_list)
     self.pbar = tqdm(total=total,
                      ascii=True,
                      position=0,
                      leave=False,
                      dynamic_ncols=True)
     self.logger = get_logger('SCAN',
                              'INFO',
                              handler=TqdmLoggingHandler(self.pbar))
     self.scan_logger = get_logger('URL',
                                   'INFO',
                                   log_format="[%(asctime)s] %(message)s",
                                   handler=TqdmLoggingHandler(self.pbar))
     self.sessions = []
     self.loop = asyncio.get_event_loop()
     self.loop.set_exception_handler(self.handle_exception)
     self.setup_sighandler()
     if not self.config.proxy:
         self.conn = TCPConnector(
             limit=self.config.max_connections,
             limit_per_host=self.config.max_connections_per_host,
             ttl_dns_cache=300)
     else:
         if self.config.proxy.startswith('socks5h'):
             proxy_addr = self.config.proxy.replace("socks5h", "socks5")
             self.conn = ProxyConnector.from_url(proxy_addr)
             self.conn._rdns = True
     self.tasks = defaultdict(list)
     self.sem = asyncio.Semaphore(self.config.max_connections)
     self.setup_sessions()
     self.running = asyncio.Event()
     self.running.set()
示例#29
0
async def process_urls(headers, username, model_id, urls):
    if urls:
        operations.create_database(model_id)
        media_ids = operations.get_media_ids(model_id)
        separated_urls = separate_by_id(urls, media_ids)

        path = pathlib.Path.cwd() / username
        path.mkdir(exist_ok=True)

        # Added pool limit:
        limits = httpx.Limits(max_connections=10, max_keepalive_connections=5)
        async with httpx.AsyncClient(headers=headers, limits=limits) as c:
            aws = [
                asyncio.create_task(download(c, path, model_id, *url))
                for url in separated_urls
            ]

            with tqdm(desc='Files downloaded',
                      total=len(aws),
                      colour='cyan',
                      leave=True) as bar:
                for coro in asyncio.as_completed(aws):
                    await coro
                    bar.update()
示例#30
0
文件: cli.py 项目: AviGopal/userbot
def start(
    query,
    page_size,
    continue_from,
    early_stop,
    sort,
    order,
    output,
    workers,
    token,
    username,
    silent,
    no_auth,
    org,
):
    click.clear()

    if not token:
        click.echo(
            "(You can set the GITHUB_TOKEN environment variable to skip this)")
        token = click.prompt("GitHub Personal Access Token")
    if not token:
        click.echo("Token is invalid")
        raise click.exceptions.Exit(1)

    click.clear()

    state = None

    if os.path.exists(".state"):
        if click.confirm("Continue from last saved state? (Y/n)"):
            with open(".state", "rb") as fp:
                state = pickle.load(fp)
                continue_from = state.continue_from
                query = state.query
    if not silent:
        if not state:
            click.echo(f"current query is {query}")
            if click.confirm("enter new query? (y/N)"):
                query = click.prompt("query")

            click.echo(f"continue from {continue_from}")
            if click.confirm("change? (y/N)", ):
                continue_from = int(click.prompt("page number"))

        click.echo(
            f"stopping after adding {early_stop}? (0 runs until completion)")
        if click.confirm("change? (y/N)", ):
            early_stop = int(click.prompt("number of entries"))

        click.echo(f"output directory is: {output}")
        if click.confirm("change? (y/N)", ):
            output = str(click.prompt("filepath"))
    if org:
        query = "type:org " + query
    if not silent:
        click.clear()
        click.echo(f"started at:  {datetime.now().isoformat()}")
        click.echo(f"user:  {username}")
        click.echo(f"query: {query}\n")
        click.echo(f"starting from: {continue_from}\n")
        click.echo(
            f"ending early: {f'minimum {early_stop} entries' if early_stop else 'no'}\n"
        )
    stalker = Stalker(
        query=query,
        token=token,
        continue_from=continue_from,
        output_path=output,
        silent=silent,
        state=state,
        early_stop=early_stop,
        org_flag=org,
    )
    try:
        stalker.start()
    except click.exceptions.Abort:
        click.echo("exiting...")
        stalker.stop()
    if not silent:
        tq = tqdm()
        tq.write(f"saved state to .state")
    else:
        print(f"saved state to .state")