示例#1
0
def _array_parallel(fn, cls, genelist, chunksize=250, processes=1, **kwargs):
    """
    Returns an array of genes in `genelist`, using `bins` bins.

    `genelist` is a list of pybedtools.Interval objects

    Splits `genelist` into pieces of size `chunksize`, creating an array
    for each chunk and merging ret

    A chunksize of 25-100 seems to work well on 8 cores.
    """
    pool = multiprocessing.Pool(processes)
    chunks = list(chunker(genelist, chunksize))
    # pool.map can only pass a single argument to the mapped function, so you
    # need this trick for passing multiple arguments; idea from
    # http://stackoverflow.com/questions/5442910/
    #               python-multiprocessing-pool-map-for-multiple-arguments
    #
    results = pool.map(
        func=_array_star,
        iterable=itertools.izip(
            itertools.repeat(fn),
            itertools.repeat(cls),
            chunks,
            itertools.repeat(kwargs)))
    pool.close()
    pool.join()
    return results
示例#2
0
def _array_parallel(fn, cls, genelist, chunksize=250, processes=1, **kwargs):
    """
    Returns an array of genes in `genelist`, using `bins` bins.

    `genelist` is a list of pybedtools.Interval objects

    Splits `genelist` into pieces of size `chunksize`, creating an array
    for each chunk and merging ret

    A chunksize of 25-100 seems to work well on 8 cores.
    """
    pool = multiprocessing.Pool(processes)
    chunks = list(chunker(genelist, chunksize))
    # pool.map can only pass a single argument to the mapped function, so you
    # need this trick for passing multiple arguments; idea from
    # http://stackoverflow.com/questions/5442910/
    #               python-multiprocessing-pool-map-for-multiple-arguments
    #
    results = pool.map(func=_array_star,
                       iterable=itertools.izip(itertools.repeat(fn),
                                               itertools.repeat(cls), chunks,
                                               itertools.repeat(kwargs)))
    pool.close()
    pool.join()
    return results
示例#3
0
def _count_array_parallel(fn, cls, genelist, chunksize=250, processes=1, **kwargs):
    pool = multiprocessing.Pool(processes)
    chunks = list(chunker(genelist, chunksize))
    results = pool.map(
        func=_count_array_star,
        iterable=itertools.izip(
            itertools.repeat(fn),
            itertools.repeat(cls),
            chunks,
            itertools.repeat(kwargs)))
    pool.close()
    pool.join()
    return results
示例#4
0
    def retrieve_pages(self, pagetitles, data, chunk_size=50, delay=0.5):
        for i, chunk in enumerate(chunker(pagetitles, chunk_size)):
            show_progress(
                i * chunk_size + len(chunk), len(pagetitles),
                "Retrieving chunk '{}'-'{}'".format(chunk[0], chunk[-1]))
            data["titles"] = "|".join(chunk)

            response = self.post(self.api_location, data=data)

            yield response
            sleep(delay)

        show_progress(len(pagetitles), len(pagetitles), "Retrieved chunks.",
                      True)
示例#5
0
    def retrieve_pages(self, pagetitles, data, chunk_size=50, delay=0.5):
        for i, chunk in enumerate(chunker(pagetitles, chunk_size)):
            show_progress(
                i * chunk_size + len(chunk), len(pagetitles),
                "Retrieving chunk '{}'-'{}'".format(chunk[0], chunk[-1])
            )
            data["titles"] = "|".join(chunk)

            response = self.post(self.api_location, data=data)

            yield response
            sleep(delay)

        show_progress(len(pagetitles), len(pagetitles),
                      "Retrieved chunks.", True)
示例#6
0
    def getWikidataItems(self, allItems):
        self.fetchWikipediaLanguages()
        self.wdSite = wikipedia('www', 'wikidata')

        filename = config['filename-api']

        if os.path.isfile(filename):
            data = eval(open(filename, 'r', encoding='utf-8').read())
            self.oneBatch(data, False)
        else:
            for group in chunker(allItems, 50):
                self.oneBatch(group)

            saveToFile(filename,
                       json.dumps(self.itemData_FULL, ensure_ascii=False))
示例#7
0
	def get_item_data(self, wd_items, raw=False, attributes = ['sitelinks', 'claims'], claim_props=[]):

		retMap = {}

		for batch in chunker(wd_items, 49):
			res = self.site('wbgetentities', ids=batch, props='|'.join(attributes))
			for entity in res.get('entities'):
				data = res.get('entities').get(entity)
				tmp_data = {}
				for attr in attributes:
					if attr == 'sitelinks':
						sitelinks = {f:data.get(attr).get(f).get('title') for f in data.get(attr)}
						data.update({'sitelinks': sitelinks})
					if attr == 'claims':
						claims = clean_api(data.get(attr))
						data.update({'claims': claims})
				#print(data.get('type'))
				#parsed_data = clean_api(data) if raw and 'claims' else data

				retMap.update({entity: data})

		return retMap