Exemplo n.º 1
0
    def separate_into_parts(self, input_path: str, output_path: str):
        """Creates a dynamic mix

        :param input_path: Input path
        :param output_path: Output path
        """
        input_path = Path(input_path)
        output_path = Path(output_path)

        model = self.get_model()
        raw_sources = self.apply_model(model, input_path)

        # Export all source MP3s in parallel
        pool = Pool()
        tasks = []

        for source, name in zip(raw_sources,
                                ['drums', 'bass', 'other', 'vocals']):

            source = source.cpu().transpose(0, 1).numpy()
            filename = f'{name}.mp3'

            print(f'Exporting {name} MP3...')
            task = pool.apply_async(self.audio_adapter.save,
                                    (output_path / filename, source,
                                     self.sample_rate, 'mp3', self.bitrate))
            tasks.append(task)

        try:
            pool.close()
            pool.join()
        except SoftTimeLimitExceeded as e:
            pool.terminate()
            raise e
Exemplo n.º 2
0
    def separate_into_parts(self, input_path: str, output_path: str):
        """Creates a dynamic mix

        :param input_path: Input path
        :param output_path: Output path
        """
        input_path = Path(input_path)
        output_path = Path(output_path)
        self.download_and_verify()
        raw_sources = self.apply_model(input_path)

        # Export all source MP3s in parallel
        pool = Pool()
        tasks = []

        for source, name in zip(raw_sources,
                                ['drums', 'bass', 'other', 'vocals']):
            source = (source * 2**15).clamp_(-2**15, 2**15 - 1).short()
            source = source.cpu().transpose(0, 1).numpy()
            filename = f'{name}.mp3'

            print(f'Exporting {name} MP3...')
            task = pool.apply_async(encode_mp3,
                                    (source, str(output_path / filename),
                                     self.bitrate, self.verbose))
            tasks.append(task)

        try:
            pool.close()
            pool.join()
        except SoftTimeLimitExceeded as e:
            pool.terminate()
            raise e
Exemplo n.º 3
0
 def extractor_pool(self, func, iterable):
     '''
     Extract items (billard multiprocessing use)
     :param func: function
     :param iterable: list
     '''
     _finalizers = list()
     p = Pool(processes=cpu_count())
     _finalizers.append(Finalize(p, p.terminate))
     try:
         p.map_async(func, iterable)
         p.close()
         p.join()
     finally:
         p.terminate()
Exemplo n.º 4
0
 def _async_worker(self, sites_list) -> tuple:
     """
     Запуск парсера в асинхронном режиме.
     :param sites_list: список сайтов для анализа
     :return: спискок кортежей с резульатами парсинга
     """
     self.process_pool = Pool(processes=settings.PROCESS_AMOUNT)
     results = [
         self.process_pool.apply_async(
             settings.AVAILABLE_RENDERS[site.news_portal],
             args=(site.target_url, )) for site in sites_list
     ]
     clean_data = [i.get() for i in results]
     self.process_pool.close()
     self.process_pool.join()
     return clean_data
Exemplo n.º 5
0
def classificar_baixados(idfiltro):
    logger.info("Classificando filtro %s" % idfiltro)
    m_filtro = Filtro.objects.get(pk=idfiltro)

    if m_filtro.situacao in SITUACOES_EXECUTORES:
        return

    m_filtro.situacao = "4"
    m_filtro.percentual_atual = 0
    m_filtro.save()

    # monta a estrutura de classificadores
    estrutura = montar_estrutura_filtro(m_filtro)

    documentos = m_filtro.documento_set.all()
    iterador = documentos.iterator()
    logger.info("Contando a quantidade de documento")
    qtd_documentos = documentos.count()
    pool = Pool(
        cpu_count(),
        initializer=classificador_inicializador,
        initargs=(estrutura, ),
    )

    contador = 0
    logger.info(
        "Aplicando classificadores em paralelo: %s chunks em %s nucleos" %
        (settings.CLASSIFICADOR_CHUNKSIZE, cpu_count()))
    for documento in pool.imap(
            classificar_paralelo,
            iterador,
            chunksize=settings.CLASSIFICADOR_CHUNKSIZE,
    ):
        contador += 1
        m_filtro.percentual_atual = contador / qtd_documentos * 100
        if contador % 500 == 0:
            logger.info("Percentual %s" % m_filtro.percentual_atual)
        m_filtro.save()
        documento.save()

    logger.info("Terminei classificação regex, começando LDA")

    # aplica modelo LDA
    aplicar_lda(m_filtro)

    m_filtro.situacao = "5"
    m_filtro.save()
Exemplo n.º 6
0
    def separate_into_parts(self, input_path: str, output_path: Path):
        # Check if we downloaded a webpage instead of the actual model file
        file_exists = self.model_file_path.is_file()
        mime = None
        if file_exists:
            mime = magic.from_file(str(self.model_file_path), mime=True)

        download_and_verify(MODEL_URL,
                            self.model_dir,
                            self.model_file_path,
                            force=(file_exists and mime == 'text/html'))

        parts = {
            'vocals': True,
            'drums': True,
            'bass': True,
            'other': True
        }

        estimates = self.get_estimates(input_path, parts)

        # Export all source MP3s in parallel
        pool = Pool()
        tasks = []
        output_path = Path(output_path)

        for name, estimate in estimates.items():
            filename = f'{name}.mp3'
            print(f'Exporting {name} MP3...')
            task = pool.apply_async(self.audio_adapter.save, (output_path / filename, estimate, self.sample_rate, 'mp3', self.bitrate))
            tasks.append(task)

        pool.close()
        pool.join()
Exemplo n.º 7
0
 async def waiter():
     def _call(child):
         return child()
     pool = Pool(forking_factor)
     results = pool.map(_call, children)
     pool.close()
     pool.join()
     return results
def get_resource_info_from_k8s_task():
    start = time.perf_counter()

    with Pool(maxtasksperchild=8) as executor:
        db_records = list(Account.objects.all().values())
        executor.map(get_node_info_from_k8s, db_records)

    finish = time.perf_counter()
    print(f'Finish  in {round(finish - start, 2)} second(s)')
    print(len(list(Account.objects.all().values())))
Exemplo n.º 9
0
def send_notification():
    start = time.perf_counter()

    # with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    with Pool() as executor:
        db_records = list(Resource.objects.all().values())
        executor.map(notification, db_records)

    finish = time.perf_counter()
    print(f'Finish  in {round(finish - start, 2)} second(s)')
    print(len(list(Account.objects.all().values())))
Exemplo n.º 10
0
def upload_to_db():
    start = time.perf_counter()
    data = GetConfigFile()
    data.get_all_data_from_configfile()
    records = data.get_list_data()
    with Pool() as executor:
        db_records = list(Account.objects.all().values())
        results = executor.map(save_to_db, records)

    finish = time.perf_counter()
    print(f'Finish  in {round(finish - start, 2)} second(s)')
    print(len(list(Account.objects.all().values())))
Exemplo n.º 11
0
def query_tweets(query,
                 limit=None,
                 begindate=dt.date(2006, 3, 21),
                 enddate=dt.date.today(),
                 poolsize=20,
                 lang='',
                 use_proxies=False):
    no_days = (enddate - begindate).days

    if (no_days < 0):
        sys.exit('Begin date must occur before end date.')

    if poolsize > no_days:
        # Since we are assigning each pool a range of dates to query,
        # the number of pools should not exceed the number of dates.
        poolsize = no_days
    dateranges = [
        begindate + dt.timedelta(days=elem)
        for elem in linspace(0, no_days, poolsize + 1)
    ]

    if limit and poolsize:
        limit_per_pool = (limit // poolsize) + 1
    else:
        limit_per_pool = None

    # If we are setting pool size to 1, add a pause between requests to avoid IP ban by Twitter.
    throttled = poolsize == 1 and not use_proxies
    queries = [
        '{} since:{} until:{}'.format(query, since, until)
        for since, until in zip(dateranges[:-1], dateranges[1:])
    ]

    all_tweets = []
    try:
        pool = Pool(poolsize)
        logger.info('queries: {}'.format(queries))
        try:
            for new_tweets in pool.imap_unordered(
                    partial(query_tweets_once,
                            throttled=throttled,
                            limit=limit_per_pool,
                            lang=lang,
                            use_proxies=use_proxies), queries):
                all_tweets.extend(new_tweets)
                logger.info('Got {} tweets ({} new).'.format(
                    len(all_tweets), len(new_tweets)))
        except KeyboardInterrupt:
            logger.info('Program interrupted by user. Returning all tweets '
                        'gathered so far.')
    finally:
        pool.close()
        pool.join()

    return all_tweets
Exemplo n.º 12
0
class MultiprocessingDistributor(DistributorBaseClass):
    """
    Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine.
    """
    def __init__(self,
                 n_workers,
                 disable_progressbar=False,
                 progressbar_title="Feature Extraction",
                 show_warnings=True):
        """
        Creates a new MultiprocessingDistributor instance

        :param n_workers: How many workers should the multiprocessing pool have?
        :type n_workers: int
        :param disable_progressbar: whether to show a progressbar or not.
        :type disable_progressbar: bool
        :param progressbar_title: the title of the progressbar
        :type progressbar_title: basestring
        :param show_warnings: whether to show warnings or not.
        :type show_warnings: bool
        """
        self.pool = Pool(processes=n_workers,
                         initializer=initialize_warnings_in_workers,
                         initargs=(show_warnings, ))
        self.n_workers = n_workers
        self.disable_progressbar = disable_progressbar
        self.progressbar_title = progressbar_title

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to a thread pool

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """
        return self.pool.imap_unordered(partial(func, **kwargs),
                                        partitioned_chunks)

    def close(self):
        """
        Collects the result from the workers and closes the thread pool.
        """
        self.pool.close()
        self.pool.terminate()
        self.pool.join()
Exemplo n.º 13
0
    def __init__(self,
                 n_workers,
                 disable_progressbar=False,
                 progressbar_title="Feature Extraction",
                 show_warnings=True):
        """
        Creates a new MultiprocessingDistributor instance

        :param n_workers: How many workers should the multiprocessing pool have?
        :type n_workers: int
        :param disable_progressbar: whether to show a progressbar or not.
        :type disable_progressbar: bool
        :param progressbar_title: the title of the progressbar
        :type progressbar_title: basestring
        :param show_warnings: whether to show warnings or not.
        :type show_warnings: bool
        """
        self.pool = Pool(processes=n_workers,
                         initializer=initialize_warnings_in_workers,
                         initargs=(show_warnings, ))
        self.n_workers = n_workers
        self.disable_progressbar = disable_progressbar
        self.progressbar_title = progressbar_title
Exemplo n.º 14
0
def query_tweets(query,
                 limit=None,
                 begindate=dt.date(2006, 3, 21),
                 enddate=dt.datetime.now(),
                 poolsize=20,
                 lang=''):
    no_secs = (enddate - begindate).seconds

    if (no_secs < 0):
        sys.exit('Begin date must occur before end date.')

    if poolsize > no_secs:
        # Since we are assigning each pool a range of dates to query,
        # the number of pools should not exceed the number of dates.
        poolsize = no_secs
    dateranges = [
        begindate + dt.timedelta(seconds=elem)
        for elem in linspace(0, no_secs, poolsize + 1)
    ]

    if limit and poolsize:
        limit_per_pool = (limit // poolsize) + 1
    else:
        limit_per_pool = None

    queries = [
        '{} since_time:{} until_time:{}'.format(
            query, int(time.mktime(since.timetuple())),
            int(time.mktime(until.timetuple())))
        for since, until in zip(dateranges[:-1], dateranges[1:])
    ]

    all_tweets = []
    try:
        pool = Pool(poolsize)
        logger.info('queries: {}'.format(queries))
        try:
            for new_tweets in pool.imap_unordered(
                    partial(query_tweets_once, limit=limit_per_pool,
                            lang=lang), queries):
                all_tweets.extend(new_tweets)
                logger.info('Got {} tweets ({} new).'.format(
                    len(all_tweets), len(new_tweets)))
        except KeyboardInterrupt:
            logger.info('Program interrupted by user. Returning all tweets '
                        'gathered so far.')
    finally:
        pool.close()
        pool.join()

    return all_tweets
Exemplo n.º 15
0
def query_tweets_parallel(query,
                          limit=None,
                          begindate=dt.date(2006, 3, 21),
                          enddate=dt.date.today(),
                          poolsize=20,
                          lang=''):
    number_days = (enddate - begindate).days

    if poolsize > number_days:
        # Since we are assigning each pool a range of dates to query,
        # the number of pools should not exceed the number of dates.
        poolsize = number_days

    dateranges = [
        begindate + dt.timedelta(days=elem)
        for elem in linspace(0, number_days, poolsize + 1)
    ]

    if limit and poolsize:
        limit_per_pool = (limit // poolsize) + 1
    else:
        limit_per_pool = None

    queries = [
        '{} since:{} until:{}'.format(query, since, until)
        for since, until in zip(dateranges[:-1], dateranges[1:])
    ]

    all_tweets = []
    try:
        pool = Pool(poolsize)
        logger.info('queries: {}'.format(queries))
        try:
            for new_tweets in pool.imap_unordered(
                    partial(query_tweets_once,
                            limit=limit_per_pool,
                            lang=lang,
                            use_proxy=use_proxy), queries):
                all_tweets.extend(new_tweets)
                logger.info('Got {} tweets ({} new).'.format(
                    len(all_tweets), len(new_tweets)))
        except KeyboardInterrupt:
            logger.info('Program interrupted by user. Returning all tweets '
                        'gathered so far.')
    finally:
        pool.close()
        pool.join()

    return all_tweets
Exemplo n.º 16
0
    def separate_into_parts(self, input_path: str, output_path: Path):
        self.download_and_verify()

        ctx = get_extension_context(self.context)
        nn.set_default_context(ctx)
        nn.set_auto_forward(True)

        audio, _ = self.audio_adapter.load(input_path,
                                           sample_rate=self.sample_rate)

        if audio.shape[1] > 2:
            warnings.warn('Channel count > 2! '
                          'Only the first two channels will be processed!')
            audio = audio[:, :2]

        if audio.shape[1] == 1:
            print('received mono file, so duplicate channels')
            audio = np.repeat(audio, 2, axis=1)

        print('Separating...')
        estimates = separate(audio,
                             model_path=str(self.model_file_path),
                             niter=self.iterations,
                             alpha=self.alpha,
                             softmask=self.softmask,
                             residual_model=self.residual_model)

        output_path = Path(output_path)

        # Export all source MP3s in parallel
        pool = Pool()
        tasks = []

        for name, estimate in estimates.items():
            filename = f'{name}.mp3'
            print(f'Exporting {name} MP3...')
            task = pool.apply_async(self.audio_adapter.save, (os.path.join(
                output_path,
                filename), estimate, self.sample_rate, 'mp3', self.bitrate))
            tasks.append(task)

        pool.close()
        pool.join()
Exemplo n.º 17
0
    def separate_into_parts(self, input_path: str, output_path: Path):
        self.download_and_verify()
        estimates = self.get_estimates(input_path)

        # Export all source MP3s in parallel
        pool = Pool()
        tasks = []
        output_path = Path(output_path)

        for name, estimate in estimates.items():
            filename = f'{name}.mp3'
            print(f'Exporting {name} MP3...')
            task = pool.apply_async(self.audio_adapter.save, (os.path.join(
                output_path,
                filename), estimate, self.sample_rate, 'mp3', self.bitrate))
            tasks.append(task)

        pool.close()
        pool.join()
Exemplo n.º 18
0
def query_tweets(query,
                 limit=None,
                 begindate=dt.date(2006, 3, 21),
                 enddate=dt.date.today(),
                 poolsize=20,
                 lang=''):
    no_days = (enddate - begindate).days

    if (no_days < 0):
        sys.exit('Begin date must occur before end date.')

    if poolsize > no_days:
        poolsize = no_days
    dateranges = [
        begindate + dt.timedelta(days=elem)
        for elem in linspace(0, no_days, poolsize + 1)
    ]

    if limit and poolsize:
        limit_per_pool = (limit // poolsize) + 1
    else:
        limit_per_pool = None

    queries = [
        '{} since:{} until:{}'.format(query, since, until)
        for since, until in zip(dateranges[:-1], dateranges[1:])
    ]

    all_tweets = []
    try:
        pool = Pool(poolsize)
        for new_tweets in pool.imap_unordered(
                partial(query_tweets_once, limit=limit_per_pool, lang=lang),
                queries):
            all_tweets.extend(new_tweets)
    finally:
        pool.close()
        pool.join()

    return all_tweets
    def scrape(self, keywords):
        
        all_tweets = []
        pool_size = 20

        start_date = dt.date.today() - dt.timedelta(14)
        query = " ".join(keywords)        

        no_of_days = (dt.date.today() - start_date).days
        if no_of_days < pool_size:
            pool_size = no_of_days

        date_ranges = [
            start_date + dt.timedelta(days=elem)
            for elem in np.linspace(0, no_of_days, pool_size + 1)
        ]

        if self.limit and pool_size:
            self.limit = (self.limit // pool_size) + 1

        queries = [
            "{} since:{} until:{}".format(query, since, until)
            for since, until in zip(date_ranges[:-1], date_ranges[1:])
        ]

        pool = Pool(pool_size)
        logging.info("queries: {}".format(queries))

        try:
            for new_tweets in pool.imap_unordered(self.get_tweets, queries):
                all_tweets.extend(new_tweets)
        except KeyboardInterrupt:
            logging.info(
                "Program interrupted by user. Returning all tweets " "gathered so far."
            )
        finally:
            pool.close()
            pool.join()

        return all_tweets
Exemplo n.º 20
0
def crawl_keywords(what):
    print(what)
    p = Pool(3)
    p.apply_async(crawl)
    p.close()
    p.join()
Exemplo n.º 21
0
def get_pool(pool=None):
    if not pool:
        pool = Pool(cpu_count())
    return pool
Exemplo n.º 22
0
class RenderAndSave():
    def __init__(self, async_mode=True):
        """
        :param async_mode: False - используется для теста
        """
        self.async_mode = async_mode

    @staticmethod
    def get_available_sites(news_portal=None, news_dep=None) -> list:
        """
        Получение списка доступных сайтов
        :param news_portal: новостной портал, по активным тематическим разделам которого будет выполнен поиск, если
        не задан - поиск ведется по всем активным
        :param news_dep: новостной раздел портала
        :return: query
        """
        if news_portal:
            return [
                Site.objects.filter(is_active=True,
                                    news_portal=news_portal,
                                    news_department=news_dep)
            ]
        return [site for site in Site.objects.filter(is_active=True)]

    @staticmethod
    def _save_postgr(results):
        """
        Сохранение в Postgresql результатов парсинга
        :param results: список с корежами, содержащими словарь, описывающий результат парсинга отдельной статьи
        :return:
        """
        for part in results:
            for result in part:
                Article.objects.get_or_create(
                    link=result['news_link'],
                    has_prices=get_has_prices(result['main_text']),
                    has_percents=get_has_percents(result['main_text']),
                    frequent_words=get_frequent_words(result['main_text']),
                    content=result)

    def _async_worker(self, sites_list) -> tuple:
        """
        Запуск парсера в асинхронном режиме.
        :param sites_list: список сайтов для анализа
        :return: спискок кортежей с резульатами парсинга
        """
        self.process_pool = Pool(processes=settings.PROCESS_AMOUNT)
        results = [
            self.process_pool.apply_async(
                settings.AVAILABLE_RENDERS[site.news_portal],
                args=(site.target_url, )) for site in sites_list
        ]
        clean_data = [i.get() for i in results]
        self.process_pool.close()
        self.process_pool.join()
        return clean_data

    def _sync_worker(self, site) -> list:
        """
        Запуск парсера в синхронном режиме.
        :param sites_list: сайт для анализа
        :return: спискок кортежей с резульатами парсинга
        """
        try:
            site = site[0]
            return [
                settings.AVAILABLE_RENDERS[site.news_portal].__call__(
                    site.target_url)
            ]
        except Exception as err:
            logger.error(err)

    def run_parser(self) -> list:
        """
        Запуск парсера.
        :return:
        """
        sites_list = self.get_available_sites()
        if self.async_mode:
            data_from_site = self._async_worker(sites_list)
        else:
            data_from_site = self._sync_worker(sites_list)
        if not data_from_site:
            logger.info("Ошибка рабзора сайта")
            raise Exception("Ошибка рабзора сайта")
        self._save_postgr(data_from_site)
        return [i.target_url for i in sites_list]