Exemplos de WorkerPool em Python, exemplos de workerpool.WorkerPool em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: crawler.py Projeto: ZijunH/Roche_Biochemical_Pathway_Generator

def save_imgs():
    pool = workerpool.WorkerPool(size=10)
    for map_name in maps:
        i_range = maps[map_name][0]
        j_range = maps[map_name][1]
        make_dir(map_name)
        for feature in features:
            make_dir(map_name, feature)
            for i in range(i_range):
                make_dir(map_name, feature, i)
                for j in range(j_range):
                    loc_url = url_maker(map_name, feature, i, j)
                    save_to = store_location_maker(map_name, feature, i, j)
                    job = DownloadJob(loc_url, save_to)
                    pool.put(job)
    pool.shutdown()
    pool.wait()
    # proofread
    pool = workerpool.WorkerPool(size=10)
    for map_name in maps:
        i_range = maps[map_name][0]
        j_range = maps[map_name][1]
        make_dir(map_name)
        for feature in features:
            make_dir(map_name, feature)
            for i in range(i_range):
                make_dir(map_name, feature, i)
                for j in range(j_range):
                    loc_url = url_maker(map_name, feature, i, j)
                    save_to = store_location_maker(map_name, feature, i, j)
                    if not os.path.exists(save_to):
                        job = DownloadJob(loc_url, save_to)
                        pool.put(job)
    pool.shutdown()
    pool.wait()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: crawler.py Projeto: ZijunH/Roche_Biochemical_Pathway_Generator

def get_layers():
    pool = workerpool.WorkerPool(size=10)
    for map_name in maps:
        i_range = maps[map_name][0]
        j_range = maps[map_name][1]
        # (0, 24, 28, 37, 45)
        for i in range(i_range):
            for j in range(j_range):
                job = GetterJob(map_name, i, j)
                pool.put(job)
    pool.shutdown()
    pool.wait()
    # proofread due to exceptions in concurrency
    pool = workerpool.WorkerPool(size=10)
    for map_name in maps:
        i_range = maps[map_name][0]
        j_range = maps[map_name][1]
        for i in range(i_range):
            for j in range(j_range):
                store_loc = store_location_maker(map_name, "merged", i, j)
                if not os.path.exists(store_loc):
                    job = GetterJob(map_name, i, j)
                    pool.put(job)
    pool.shutdown()
    pool.wait()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: default.py Projeto: zizu100/repository.arabic.xbmc-addons

def getChannels(url):
    if __settings__.getSetting('paid_account') == "true":
        while not login():
            xbmc.executebuiltin(
                "XBMC.Notification('GLArab','INVALID username and/or password.',30000,"
                + icon + ")")
            __settings__.openSettings()
        url += '&type=reg'
    else:
        url += '&type=free'

    resp = opener.open(url)
    inner_data = resp.read()
    inner_soup = BeautifulSoup(inner_data)
    container = inner_soup.find('div', id='listContainerScroll')

    thumbnail = "DefaultVideo.png"
    pattern = re.compile("\makeHttpRequest\(\'(.*?)\&\',")

    NUM_SOCKETS = 5
    NUM_WORKERS = 8

    http = urllib3.PoolManager(maxsize=NUM_SOCKETS)
    workers = workerpool.WorkerPool(size=NUM_WORKERS)

    for span in container:
        workers.put(FetchJob(span, pattern, http))

    workers.shutdown()
    workers.wait()

Exemplo n.º 4

0

Exibir arquivo

    def downloadAndExtract(self):
        """Downloads and extracts Medline data."""

        # Extract topics and articles url
        mLE = MedlineLinkExtractor()
        mLE.readXml(self.__topicsXML_path)
        topics = mLE.getTopics()
        articles = mLE.getArticles()
        topicCount = 0
        articleCount = 0

        # Downloader
        pool = workerpool.WorkerPool(size=20)
        for url in topics:
            topicCount = topicCount + 1
            job = MedlineDataExtractor(url, self.__topicPath, isArticle=False)
            pool.put(job)

        for url in articles:
            articleCount = articleCount + 1
            job = MedlineDataExtractor(url, self.__articlePath, isArticle=True)
            pool.put(job)

        pool.shutdown()
        pool.wait()
        print str(topicCount) + ' Topics downloaded'
        print str(articleCount) + ' Articles downloaded'
        print 'Run medlineIndexer.py file to build index of topics/aricles'

Exemplo n.º 5

0

Exibir arquivo

Arquivo: GetSpurtData.py Projeto: 07101994/apps-1

def Do_MultiThread(dirlist, jobid):
    global thread_num
    # Initialize a pool, 5 threads in this case
    pool = workerpool.WorkerPool(size=thread_num, maxjobs=thread_num)
    cnt = 0
    runcnt = 0
    total = len(dirlist)
    print 'Enter Do_MultiThread'
    for filename in dirlist:
#         thread.start_new_thread(Get_OneSpurtData,(filename,))
#         print 'thread start '
        try:
            try:
                job = DoOneJob(filename, jobid, runcnt)
                print 'Job', runcnt, '/', total
                pool.put(job)    
                runcnt += 1
                time.sleep(0.5)
                if debug_maxcnt>0 and runcnt >= debug_maxcnt:
                    break
            except:
                print 'get  error'
                break
        except:
            break
    # Send shutdown jobs to all threads, and wait until all the jobs have been completed
#     time.sleep(10)
    pool.shutdown()
    pool.wait()

Exemplo n.º 6

0

Exibir arquivo

    def test_map(self):
        "Map a list to a method to a pool of two workers."
        pool = workerpool.WorkerPool(2)

        r = pool.map(self.double, [1, 2, 3, 4, 5])
        self.assertEquals(set(r), {2, 4, 6, 8, 10})
        pool.shutdown()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: AbstractDownloader.py Projeto: MathewMacDougall/GetMeMusic

    def download_songs(self):
        """
        Downloads the songs passed upon object creation into a folder.
        The Downloads are mp3 files and are names according to their artist and title

        :return: A list of dictionaries representing any songs that failed to download, with each dictionary
                 containing the information of a song (like songs).
        """
        # make subfolder for this set of downloads
        try:
            os.mkdir(self.download_path)
            print("Creating download directory for " + self.folder_name + "...")
        except FileExistsError:
            print("Download folder already exists for " + self.folder_name + "...")

        os.chdir(self.download_path)
        self._remove_existing_songs_from_list()

        # no real benefit after ~10 threads since limited by download speeds
        pool = workerpool.WorkerPool(size=10)

        for song in self.requested_songs:
            job = DownloadJob(self, song)
            pool.put(job)

        pool.shutdown()
        pool.wait()

        return [self.num_existing_songs, self.failed_downloaded_songs]

Exemplo n.º 8

0

Exibir arquivo

Arquivo: TestPatchFromFile.py Projeto: 07101994/apps-1

def do_multithread(cmdlines):
    global thread_num
    # print thread_num
    # Initialize a pool, 5 threads in this case
    pool = workerpool.WorkerPool(size=thread_num, maxjobs=4)
    cnt = 0
    runcnt = 0
    for line in cmdlines:
        try:
            cnt += 1
            print 'Job', cnt
            try:
                job = DoOneJob(line, cnt)
                pool.put(job)
                runcnt += 1
                # print runcnt
                time.sleep(0.1)
            except:
                print 'get  error'
                break
        except:
            break

    # Send shutdown jobs to all threads, and wait until all the jobs have been completed
    pool.shutdown()
    pool.wait()

Exemplo n.º 9

0

Exibir arquivo

    def test_equipped(self):
        """
        Created equipped worker that will use an internal Counter resource to
        keep track of the job count.
        """
        results = Queue()

        def toolbox_factory():
            return Counter()

        def worker_factory(job_queue):
            return workerpool.EquippedWorker(job_queue, toolbox_factory)

        pool = workerpool.WorkerPool(1, worker_factory=worker_factory)

        # Run 10 jobs
        for i in xrange(10):
            j = CountJob(results)
            pool.put(j)

        # Get 10 results
        for i in xrange(10):
            r = results.get()
            # Each result should be an incremented value
            self.assertEquals(r, i)

        pool.shutdown()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: TestWorker_hevc.py Projeto: 07101994/apps-1

def Do_MultiThread(cmds):
    global thread_num
    pool = workerpool.WorkerPool(size=thread_num, maxjobs=4)
    cnt = 0
    runcnt = 0
    for line in cmds:
        try:
            cnt += 1
            print 'Job', cnt,
            try:
                job = DoOneJob(line, cnt)
                pool.put(job)
                runcnt += 1
                # print runcnt
                # time.sleep(0.1)
                # break
            except:
                print 'get  error'
                break
        except:
            break

    # Send shutdown jobs to all threads, and wait until all the jobs have been completed
    pool.shutdown()
    pool.wait()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: main.py Projeto: myriadmobile/bitbackup

    def run(self):
        self._print_header()

        signaler = Signaler()
        bitbucket = Bitbucket(self._bb_username, self._bb_password)
        git = Git()

        def toolbox_factory():
            s3 = S3(self._s3_key, self._s3_secret, self._s3_bucket,
                    self._s3_base_path, self._s3_endpoint)
            return BitbackupWorkerToolbox(bitbucket, git, s3)

        def worker_factory(job_queue):
            worker = workerpool.EquippedWorker(job_queue, toolbox_factory)
            worker.setName(worker.getName().replace("Thread", "Worker"))
            return worker

        info('Loading repository list...')
        repos = bitbucket.get_all_repositories()

        info('Starting {} workers...'.format(self._worker_count))
        pool = workerpool.WorkerPool(size=self._worker_count,
                                     worker_factory=worker_factory,
                                     maxjobs=1)

        for repo in repos:
            if signaler.should_term():
                break
            pool.put(BitbackupJob(repo))

        pool.shutdown()
        pool.wait()
        self._print_footer()

Exemplo n.º 12

0

Exibir arquivo

def mass_download(urls, nthread):
    print('Downloading...')
    pool = workerpool.WorkerPool(size=nthread)
    saveto = [os.path.basename(url) for url in urls]
    pool.map(download, urls, saveto)
    pool.shutdown()
    pool.wait()

Exemplo n.º 13

0

Exibir arquivo

 def test_wait(self):
     "Make sure each task gets marked as done so pool.wait() works."
     pool = workerpool.WorkerPool(5)
     q = Queue()
     for i in xrange(100):
         pool.put(workerpool.SimpleJob(q, sum, [range(5)]))
     pool.wait()
     pool.shutdown()

Exemplo n.º 14

0

Exibir arquivo

 def install_multi_sw(self):
     pool = workerpool.WorkerPool(size = self.workerpool_size)
     for num in range(self.num_vm):
         self.hostname = self.base_role_name + str(num) + ".cloudapp.net"
         job = InstallSW(self.hostname, self.username, self.password)
         pool.put(job)
     pool.shutdown()
     pool.wait()

Exemplo n.º 15

0

Exibir arquivo

def cachedprime(urls, headers={}, resize=False, plex_resize=False):
    '''
    {'hash': '1dad1d1', fp': 'filepath', 'url': 'imgurl', 'resize': [[w, h, url, dest]}
    '''

    logger.debug('Got %s images' % len(urls))
    urls = remove_dict_dupe_from_list(urls, 'hash')
    logger.debug('Removed all dupicate images got %s left' % len(urls))

    imgdir = os.path.join(htpc.DATADIR, 'images/')
    made_dir = False
    if not os.path.exists(imgdir):
        logger.debug('Creating image directory at %s' % imgdir)
        os.makedirs(imgdir)
        made_dir = True

    resize_list = []

    logger.debug('This can take a while..')

    # If there is no local copy of the original
    if made_dir is True:
        logger.debug('There was no image directory, so everything is missing')
        resize_list = urls

    else:
        logger.debug('Checking for missing images')
        # cba with resizes for plex
        for item in urls:
            if not os.path.isfile(item['fp']):
                logger.debug('%s was missing, download it %s' %
                             (item['fp'], item['url']))
                resize_list.append(item)

    if made_dir is False and resize_list == 0:
        logger.debug('No missing images :)')
        return

    pool = workerpool.WorkerPool(size=20)
    for i in resize_list:
        j = CacheImgDownload(i, headers)
        pool.put(j)
    pool.shutdown()
    pool.wait()

    # use pil to resize images
    if resize_list and plex_resize is False and resize is True:
        from multiprocessing import Pool, cpu_count
        ppool = Pool(cpu_count())
        try:
            ppool.map_async(cache_resize_image, (b for b in resize_list), 5)
            ppool.close()
            ppool.join()
        except Exception as e:
            logger.debug('Failed to resize image %s' % e)
    else:
        # Already downloaded transcoded images
        return

Exemplo n.º 16

0

Exibir arquivo

Arquivo: image_downloader.py Projeto: zjuqiushi/fk-visual-search

 def download_batch(self, urlObjects):
     pool = workerpool.WorkerPool(min(self.max_pool_size, len(urlObjects)))
     errors = pool.map(self.download_image, urlObjects)
     pool.shutdown()
     pool.wait()
     errors = filter((lambda x: x),errors)
     print("Number of images sent for download "+str(len(urlObjects)))
     print("Number of images that failed "+str(len(errors)))
     return errors

Exemplo n.º 17

0

Exibir arquivo

 def delete_multi_vm(self):
     pool = workerpool.WorkerPool(size=self.workerpool_size)
     for num in range(self.num_vm):
         service_name = self.base_role_name + str(num)
         deployment_name = self.base_deployment_name + str(num)
         role_name = self.base_role_name + str(num)
         job = DeleteVMJob(service_name, deployment_name, role_name)
         pool.put(job)
     pool.shutdown()
     pool.wait()

Exemplo n.º 18

0

Exibir arquivo

def main(nombres, language, dest_dir, namespaces_path, test_limit=None, pool_size=20):
    # fix namespaces in to3dirs module so we can use it in this stage
    to3dirs.namespaces = to3dirs.Namespaces(namespaces_path)

    test_limit = int(test_limit) if test_limit else None
    pool = workerpool.WorkerPool(size=int(pool_size))
    data_urls = URLAlizer(nombres, dest_dir, language, test_limit)
    board = StatusBoard(language)
    yield pool.start(board.process, data_urls)
    print   # final new line for console aesthetic

Exemplo n.º 19

0

Exibir arquivo

 def install_multi_sw(self):
     pool = workerpool.WorkerPool(size=self.workerpool_size)
     for num in range(self.num_vm):
         self.hostname = self.service_name + ".cloudapp.net"
         self.ssh_endpoint = util.ssh_endpoint(num)
         job = InstallSW(self.hostname, self.ssh_endpoint, self.username,
                         self.password)
         pool.put(job)
     pool.shutdown()
     pool.wait()

Exemplo n.º 20

0

Exibir arquivo

Arquivo: crawlerthread.py Projeto: zouxiaochuan/WeiboMsgBackupGUI

 def __init__(self, threadNum, crawlerData, window):
     threading.Thread.__init__(self)
     self.threadNum = threadNum
     self.crawlerData = crawlerData
     self.window = window
     self.timeToQuit = threading.Event()
     self.timeToQuit.clear()
     #threadNum: 1
     self.pool = workerpool.WorkerPool(1)
     self.count = 1

Exemplo n.º 21

0

Exibir arquivo

Arquivo: createMultiVM.py Projeto: all-in-one-of/WindowsAzure

 def create_multi_vm(self):
     pool = workerpool.WorkerPool(size=self.workerpool_size)
     for num in range(self.num_vm):
         service_name = self.service_name
         deployment_name = self.deployment_name
         role_name = self.base_role_name + str(num)
         job = CreateVMJob(service_name, deployment_name, role_name, num)
         job.prepare()
         pool.put(job)
     pool.shutdown()
     pool.wait()

Exemplo n.º 22

0

Exibir arquivo

Arquivo: __init__.py Projeto: shammishailaj/s3funnel

    def _get_pool(self):
        "Get a worker pool (cached)"
        if self.pool: return self.pool

        def toolbox_factory():
            return S3ToolBox(self.aws_key, self.aws_secret_key, self.secure)
        def worker_factory(job_queue):
            return workerpool.EquippedWorker(job_queue, toolbox_factory)

        log.info("Starting pool with %d threads." % self.numthreads)
        self.pool = workerpool.WorkerPool(self.numthreads, maxjobs=self.maxjobs, worker_factory=worker_factory)
        return self.pool

Exemplo n.º 23

0

Exibir arquivo

Arquivo: sunfrog.py Projeto: vudev/sunfrogCrawler

def pool(job, params):
    print 'Initializing...'
    # Build our `map` parameters
    # Initialize a pool, 5 threads in this case
    pool = workerpool.WorkerPool(size=32)
    # The ``download`` method will be called with a line from the second
    # parameter for each job.
    pool.map(job, params)
    # Send shutdown jobs to all threads, and wait until all the jobs have been completed
    pool.shutdown()
    pool.wait()
    print 'job well done!'

Exemplo n.º 24

0

Exibir arquivo

def GetAllComment(aid, order=None):
    """
获取一个视频全部评论，有可能需要多次爬取，所以会有较大耗时
输入：
	aid：AV号
	order：排序方式 默认按发布时间倒序 可选：good 按点赞人数排序 hot 按热门回复排序
返回：
	评论列表
	"""
    MaxPageSize = 300
    commentLists = [
        GetComment(aid=aid, page=1, pagesize=MaxPageSize, order=order)
    ]
    totalPage = commentLists[0].page
    directory = 'av' + str(aid) + 'Comments'
    if not os.path.exists(directory):
        os.makedirs(directory)
    if totalPage > 1:
        #urls = ['http://api.bilibili.cn/feedback?aid=' + str(aid) + '&page=' + str(p) + '&pagesize=' + str(MaxPageSize) for p in range(2, commentList.page + 1)]
        # Make a pool
        #pool = workerpool.WorkerPool(size = 10)
        pool = workerpool.WorkerPool(size=totalPage - 1)
        # Build our `map` parameters
        #saveto = [directory + '/' + str(x) for x in range(2, commentList.page + 1)]
        # Perform the mapping
        #pool.map(urllib.urlretrieve, urls, saveto)
        commentLists2 = pool.map(GetComment, [aid] * (totalPage - 1),
                                 range(2, totalPage + 1),
                                 [MaxPageSize] * (totalPage - 1))
        # Send shutdown jobs to all threads, and wait until all the jobs have been completed
        pool.shutdown()
        pool.wait()
        for cl in commentLists2:
            commentLists.append(cl)
        '''
		commentList.comments += [Comment()] * (commentList.page - 1)
		for p in range(2, commentList.page + 1):
			commentPath = directory + '/' + str(p)
			commentList.comments[(p - 1) * MaxPageSize :] = GetCommentLocal(commentPath, MaxPageSize).comments
			os.remove(commentPath)
		os.rmdir(directory)
	#经测试发现，如果视频评论涨幅过快(av2816940)
	#那么JSON第一层的totalResult和pages可能不准
	#即使每抓一页都重新读取totalResult也无济于事
	while commentList.comments[len(commentList.comments) - 1].lv == None:
		commentList.comments.pop()
	commentList.commentLen = len(commentList.comments)'''
    for cl in commentLists:
        while cl.comments[len(cl.comments) - 1].lv == None:
            cl.comments.pop()
    commentLists = sorted(commentLists, key=GetCommentListKey, reverse=True)
    return commentLists

Exemplo n.º 25

0

Exibir arquivo

def get_AllQMdata():
    reader = csv.reader(file(listfile_sh, 'rb'))
    i = 0
    pool = workerpool.WorkerPool(size=thread_num, maxjobs=thread_num)
    for row in reader:
        print row
        Get_OneQMData(pool, data_path, data_ext, row[0].upper(), row[1])

    reader = csv.reader(file(listfile_sz, 'rb'))
    for row in reader:
        print row
        Get_OneQMData(pool, data_path, data_ext, row[0].upper(), row[1])

    pool.shutdown()
    pool.wait()

Exemplo n.º 26

0

Exibir arquivo

def main():
    urls = createUrlList(105)

    pool = workerpool.WorkerPool(size=50)

    for url in urls:
        job = DownloadJob(url.strip())
        pool.put(job)

    pool.shutdown()
    pool.wait()

    allnames = getNames(allcontents)

    counted = countSameNames(allnames)

    result = sortAndOutput(counted)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: utils_fetch.py Projeto: zengjatzau/chinese-etymology

def fetch_all(charset, character_count=None, thread_count=5):
    """ Fetch all images of characters in character set GB2312 or GBK from http://www.chineseetymology.org/

    Keyword arguments:
    charset         --  the character set in used; should be 'GB2312' or 'GBK' (case insensitive)
    character_count --  number of characters to fetch
    thread_count    --  number of threading for downloading
    """

    if character_count is None or (character_count is not None
                                   and character_count > 0):
        charset = charset.lower()
        if charset == "gb2312":
            characters = _get_gb2312_characters()
        elif charset == "gbk":
            characters = _get_gbk_characters()
        # elif charset == "gb18030":
        #     characters = get_gb18030_2005_characters()
        else:
            print("Only \"GB2312\" and \"GBK\" are accepted")
            return

        if character_count is not None:
            characters = itertools.islice(characters, character_count)

        save_to_folder = charset
        if not os.path.exists(save_to_folder):
            os.mkdir(save_to_folder)
        not_analyzed_file_name = os.path.join(save_to_folder,
                                              "not_analyzed.txt")
        not_found_file_name = os.path.join(save_to_folder, "not_found.txt")
        if os.path.exists(not_analyzed_file_name):
            os.remove(not_analyzed_file_name)
        if os.path.exists(not_found_file_name):
            os.remove(not_found_file_name)

        not_found = dict()

        pool = workerpool.WorkerPool(size=thread_count)
        pool.map(_fetch_img_of_character, characters,
                 itertools.repeat(save_to_folder), itertools.repeat(not_found))
        pool.shutdown()
        pool.wait()

        _remove_empty_characters(save_to_folder, not_analyzed_file_name)
        _write_not_found(not_found_file_name, not_found)

Exemplo n.º 28

0

Exibir arquivo

def cli(ctx, opt_fp_in, opt_excludes, opt_dir_out, opt_s3_url, opt_nthreads):
    """Download only the class hierarcy from VCAT API"""

    log = logger_utils.Logger.getLogger()
    log.debug('download images')
    if not opt_s3_url:
        log.error('S3 URL required. Try source env variables')
        return

    # get the ordered hierarchy
    vcat_data = vcat_utils.load_annotations(opt_fp_in, opt_excludes)
    hierarchy_tree = vcat_utils.hierarchy_tree(vcat_data['hierarchy'].copy())

    # build image ID lookup table. the regions refer to these
    image_lookup = {}
    for vcat_class_id, object_class in vcat_data['object_classes'].items():
        for image in object_class['images']:
            image_lookup[image['id']] = image

    url_maps = []
    for vcat_class_id, object_class in vcat_data['object_classes'].items():
        for region in object_class['regions']:
            im_meta = image_lookup[region['image']]
            url = vcat_utils.format_im_url(opt_s3_url, im_meta)
            # log.info(url)
            fp_out = vcat_utils.format_im_fn(im_meta)
            fp_out = join(opt_dir_out, fp_out)
            url_maps.append({'url': url, 'fp_out': fp_out})

    if not Path(opt_dir_out).exists():
        file_utils.mkdirs(opt_dir_out)

    # download pool
    global pbar
    pbar = tqdm(total=len(url_maps))
    pool = workerpool.WorkerPool(size=opt_nthreads)
    pool.map(downloader, url_maps)
    # Send shutdown jobs to all threads
    #   and wait until all the jobs have been completed
    pool.shutdown()
    pool.wait()
    pbar.close()

Exemplo n.º 29

0

Exibir arquivo

def execute_new_lang(logger):
    """
    Execute new_lang.exe [ make nlp result file ]
    :param      logger:             Logger
    """
    global DELETE_FILE_LIST
    logger.info("4. execute new lang")
    start = 0
    end = 0
    cmd_list = list()
    os.chdir(TA_CONFIG['ta_bin_path'])
    target_list = glob.glob("{0}/txt/*".format(TA_TEMP_DIR_PATH))
    thread = len(target_list) if len(target_list) < int(TA_CONFIG['nl_thread']) else int(TA_CONFIG['nl_thread'])
    output_dir_list = ['JSON', 'JSON2', 'HMD', 'MCNT', 'NCNT', 'IDX', 'IDXVP', 'W2V']
    for dir_name in output_dir_list:
        output_dir_path = "{0}/{1}".format(TA_TEMP_DIR_PATH, dir_name)
        if not os.path.exists(output_dir_path):
            os.makedirs(output_dir_path)
    temp_new_lang_dir_path = '{0}/{1}'.format(TA_CONFIG['ta_bin_path'], OUTPUT_DIR_NAME)
    DELETE_FILE_LIST.append(temp_new_lang_dir_path)
    if not os.path.exists(temp_new_lang_dir_path):
        os.makedirs(temp_new_lang_dir_path)
    # Make list file
    for cnt in range(thread):
        end += len(target_list) / thread
        if (len(target_list) % thread) > cnt:
            end += 1
        list_file_path = "{0}/{1}_{2}.list".format(temp_new_lang_dir_path, OUTPUT_DIR_NAME, cnt)
        list_file = open(list_file_path, 'w')
        for idx in range(start, end):
            print >> list_file, target_list[idx]
        list_file.close()
        start = end
        cmd = "./new_lang.exe -DJ {0} txt {1}".format(list_file_path, DT[:8])
        logger.debug("new_lang.exe cmd => {0}".format(cmd))
        cmd_list.append(cmd)
    pool = workerpool.WorkerPool(thread)
    pool.map(pool_sub_process, cmd_list)
    pool.shutdown()
    pool.wait()

Exemplo n.º 30

0

Exibir arquivo

def execute_hmd(logger, matrix_file_path):
    """
    Execute HMD
    :param      logger:                             Logger
    :param      matrix_file_path:                   Matrix file path
    :return:                                        HMD output directory path
    """
    global DELETE_FILE_LIST
    logger.info("5. Execute HMD")
    os.chdir(IE_TA_CONFIG['hmd_script_path'])
    hmd_file_list = glob.glob("{0}/*".format(NLP_DIR_PATH))
    hmd_output_dir_path = "{0}/HMD_result".format(TA_TEMP_DIR_PATH)
    if not os.path.exists(hmd_output_dir_path):
        os.makedirs(hmd_output_dir_path)
    start = 0
    end = 0
    cmd_list = list()
    hmd_thread = int(IE_TA_CONFIG['hmd_thread'])
    thread = len(hmd_file_list) if len(hmd_file_list) < hmd_thread else hmd_thread
    # Make list file
    for cnt in range(thread):
        end += len(hmd_file_list) / thread
        if (len(hmd_file_list) % thread) > cnt:
            end += 1
        list_file_path = "{0}/{1}_{2}.list".format(IE_TA_CONFIG['hmd_script_path'], TA_TEMP_DIR_NAME, cnt)
        DELETE_FILE_LIST.append(list_file_path)
        list_file = open(list_file_path, 'w')
        for idx in range(start, end):
            print >> list_file, hmd_file_list[idx]
        list_file.close()
        start = end
        cmd = "python {0}/hmd.py {1} {2} {3} {4}".format(
            IE_TA_CONFIG['hmd_script_path'], TA_TEMP_DIR_NAME, list_file_path, matrix_file_path, hmd_output_dir_path)
        cmd_list.append(cmd)
    pool = workerpool.WorkerPool(thread)
    pool.map(pool_sub_process, cmd_list)
    pool.shutdown()
    pool.wait()
    return hmd_output_dir_path