Пример #1
0
def csv_to_solr(fl, endpoint='http://dev-search:8983/solr/main', num_topics=999, reset_callback=None):

    if reset_callback is not None:
        print "Resetting (no way back now!)"
        reset_callback()

    print 'generating updates'
    initialize_doc = dict([('topic_%d_tf' % i, {'set': 0}) for i in range(1, num_topics)])
    p = Pool(processes=8)
    line_groupings = [[]]
    grouping_counter = 0
    total_lines = 0
    for line in fl:
        line_groupings[grouping_counter].append(line)
        if len(line_groupings[grouping_counter]) >= 10000:
            if grouping_counter == 3:
                curr_lines = sum(map(len, line_groupings))
                total_lines += curr_lines
                print 'processing line groups for', curr_lines, 'lines', total_lines, 'total'
                groupings = [(endpoint, initialize_doc, line_groupings[i]) for i in range(0, len(line_groupings))]
                print p.map_async(process_linegroup, groupings).get()
                grouping_counter = 0
                line_groupings = [[]]
            else:
                grouping_counter += 1
                line_groupings.append([])

    groupings = [(endpoint, initialize_doc, line_groupings[i]) for i in range(0, len(line_groupings))]
    print p.map_async(process_linegroup, groupings).get()

    print "Committing..."
    requests.post('%s/update?commit=true' % endpoint, headers={'Content-type': 'application/json'})

    return True
Пример #2
0
def convert_document(pdf_filename,
                     resolution=200,
                     delete_files=True,
                     page_delineation='\n--------\n',
                     verbose=False,
                     temp_dir=str(uuid.uuid4()),password='',
                     thumb_prefix='thumb_page_',
                     pool_count=2):
    just_pdf_filename = os.path.basename(pdf_filename)
    temp_pdf_filename = '{0}/{1}'.format(temp_dir, just_pdf_filename)
    shutil.copyfile(pdf_filename, temp_pdf_filename)
    filename = decrypt_pdf(temp_pdf_filename, temp_dir, password)
    filenames = split_pdf(filename, temp_dir)
    for filename in filenames:
        __pdf_filenames.put(filename)
    pool = Pool()
    pool.map_async(
        _yapot_worker,
        [(tid, just_pdf_filename, temp_dir, resolution) for
            tid in range(0, pool_count)],
    )
    while __text_filenames.qsize() != len(filenames):
        time.sleep(1)
    text_filenames = []
    try:
        while(1):
            text_filenames.append(__text_filenames.get_nowait())
    except:
        pass
    text = build_output_text(text_filenames, page_delineation)
    if delete_files:
        cleanup_yapot(temp_dir)
    return text
Пример #3
0
def filter_wids(wids, refresh=False):
    p = Pool(processes=8)
    wids = [x[0] for x in p.map_async(exists, wids).get() if x[1]]
    if not refresh:
        wids = [x[0] for x in p.map_async(not_processed, wids).get() if x[1]]

    return wids
Пример #4
0
def main():
    args = parse_options()
    viewpoint = args.observation_point
    viewpoint = Vector(*viewpoint)
    vertex_count, face_count, lines_count = importutils.analyze_file(args.input_file)
    print 'Vertices: %d, Primitives: %d' % (vertex_count, face_count)
    faces = importutils.get_faces(args.input_file)
    print 'File imported.'

    pool = Pool(args.jobs)
    try:
        result = pool.map_async(geometryutils.build_triangles, faces, 10000)
    except KeyboardInterrupt:
        pool.terminate()
        print 'Program stopped.'
        return
    triangles = result.get()

    triangles = chain.from_iterable(triangles)

    print 'Triangles generated.'

    try:
        process_data = ((t, viewpoint, args.wavelength) for t in triangles)
        result = pool.map_async(processor.try_process_triangle, process_data)
    except KeyboardInterrupt:
        pool.terminate()
        print 'Program stopped.'
        return
    data = result.get()
    data = filter(lambda x: x, data)
    print 'Model processed.'
    processor.write_triangles_data(data, args.output_file)
    print 'Data written into %s' % args.output_file
Пример #5
0
def rc(rf, alphabet, numOfThreads):
	tryn=0
	counterTmp = 0
	printCounter = 1000
	listBasic = []
	if rf.endswith('.rar'):
		funcChosen = unrar
	elif rf.endswith('.zip') or rf.endswith('.7z') :
		funcChosen = zipFileUnzip
	for a in range(1,len(alphabet)+1):
		for b in itertools.product(alphabet,repeat=a):
			k="".join(b)
			k=re.escape(k)
			listBasic.append(k)
			tryn+=1
			if len(listBasic) == numOfThreads:
				pool = Pool(numOfThreads)
				pool.map_async(funcChosen, listBasic, callback = exitPass)
				pool.close()
				if resultPass:
					timeWasted = time.time()-start
					print 'Found! Password is '+resultPass
					print "It took " +str(round(time.time()-start,3))+" seconds"
					print "Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec"
					print "Tried "+str(tryn)+" passwords"
					exit()
				listBasic = []
			counterTmp+=1
			if counterTmp >= printCounter:
				print 'Trying combination number '+str(tryn)+':'+str(k)
				timeWasted = round(time.time()-start,2)
				if timeWasted > 0:
					print "It took already " +str(timeWasted) +" seconds. Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec"
				counterTmp=0
Пример #6
0
def _get_images_from_pdf(pdf_filename, resolution, verbose, delete_files,
        temp_dir, make_thumbs, thumb_size, thumb_dir, thumb_prefix, pool_count=1):

    success = False
    try:

        if verbose == True:
            print "Splitting PDF into multiple pdf's for processing ..."

        # make sure there is a place to put our temporary pdfs
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        # make sure if we are going to make thumbs, the folde rexists
        if make_thumbs == True:
            if not os.path.exists(thumb_dir):
                os.makedirs(thumb_dir)

        # read input pdf
        inputpdf = PdfFileReader(open(pdf_filename, "rb"))
        if inputpdf.getIsEncrypted():
            inputpdf.decrypt('')

        if verbose == True:
            print "Writing out %i pages ..." % inputpdf.numPages

        # create all of the temporary pdfs
        for i in xrange(inputpdf.numPages):
            output = PdfFileWriter()
            output.addPage(inputpdf.getPage(i))
            #print output.resolvedObjects
            filename = "{0}/document-page-{1}.pdf".format(temp_dir,i)
            with open(filename, "wb") as outputStream:
                output.write(outputStream)
            __pdf_queue.put(i)

        if verbose == True:
            print "Dispatching pdf workers ..."

        # spin up our workers to convert the pdfs to images
        #pool_count = 4
        pool = Pool()
        pool.map_async(
            _pdf_converter_worker,
            [(x, resolution, verbose, delete_files,
                temp_dir, make_thumbs, thumb_size,
                thumb_dir, thumb_prefix) for \
                x in range(pool_count)]
        )

        while __pdf_texts.qsize() != inputpdf.numPages:
            time.sleep(.25)

        if verbose == True:
            print "Done converting PDF."

        success = True

    except Exception, e:
        print str(e)
Пример #7
0
def download(urls, targets, num_proc=1, username=None, password=None,
             recursive=False, filetypes=None):
    """
    Download the urls and store them at the target filenames.

    Parameters
    ----------
    urls: iterable
        iterable over url strings
    targets: iterable
        paths where to store the files
    num_proc: int, optional
        Number of parallel downloads to start
    username: string, optional
        Username to use for login
    password: string, optional
        Password to use for login
    recursive: boolean, optional
        If set then no exact filenames can be given.
        The data will then be downloaded recursively and stored in the target folder.
    filetypes: list, optional
        list of file extension to download, any others will no be downloaded
    """
    p = Pool(num_proc)
    # partial function for Pool.map
    cookie_file = tempfile.NamedTemporaryFile()
    dlfunc = partial(wget.map_download,
                     username=username,
                     password=password,
                     cookie_file=cookie_file.name,
                     recursive=recursive,
                     filetypes=filetypes)

    p.map_async(dlfunc, zip(urls, targets)).get(9999999)
    cookie_file.close()
Пример #8
0
 def correction_terms_threaded(self):
     '''Finds the correction terms assoctiated to the quadratic form,
     for each of the equivalance classes it finds the maximum by 
     iterating through the relation vectors of the group. 
     
     Uses multiprocessing.'''
     print 'Using multiprocessing'
     pool = Pool() # default: processes=None => uses cpu_count()
     manager = Manager()
     start_time = time.time()
     coef_lists = lrange(self.group.structure)
     # representatives = elements of C_1(V) (np.matrix)
     representatives = map(lambda l: self.find_rep(l), coef_lists)
     # list of maxes        
     lst = manager.list([None for i in xrange(len(representatives))]) 
     alphalist = list(self.get_alpha()) # cannot pickle generators
     pool.map_async(functools.partial(process_alpha_outside, self, 
                                      representatives, lst), alphalist)
     pool.close()
     pool.join() # wait for pool to finish
     # get corrterms via (|alpha|^2+b)/4
     print 'Computed from quadratic form in %g seconds' \
           % (time.time() - start_time)
     return [Fraction(Fraction(alpha, self.int_inverse[1]) + self.b, 4) \
                     for alpha in lst]            
Пример #9
0
def _mp_compile(self, sources, output_dir=None, macros=None,
                include_dirs=None, debug=0, extra_preargs=None,
                extra_postargs=None, depends=None):
    """Compile one or more source files.

    see distutils.ccompiler.CCompiler.compile for comments.
    """
    # A concrete compiler class can either override this method
    # entirely or implement _compile().

    macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
        output_dir, macros, include_dirs, sources, depends, extra_postargs)
    cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)

    pool = Pool(MAX_PROCS)
    try:
        print("Building using %d processes" % pool._processes)
    except:
        pass
    arr = [(self, obj, build, cc_args, extra_postargs, pp_opts)
           for obj in objects]
    pool.map_async(_mp_compile_one, arr)
    pool.close()
    pool.join()
    # Return *all* object filenames, not just the ones we just built.
    return objects
Пример #10
0
def bruteUser(userlist, psize, hosti, pathi, porti, securei, userfound):
    global host
    host = hosti
    global port
    port = porti
    global secure
    secure = securei
    global userout
    userout = userfound
    global path
    path = pathi
    f = open(userout, "w").close()
    usersize = len(userlist)
    # manage pool
    if usersize <= psize:
        chunksize = 1
    else:
        chunksize = (usersize / psize) + (usersize % psize)
    print("Userlist size: %d\tChunk size: %d\tPool size: %d" % (usersize, chunksize, psize))
    print("Bruteforcing usernames")
    pool = Pool(processes=psize)
    for chunk in itertools.izip(grouper(userlist, chunksize)):
        pool.map_async(worker, chunk)
    pool.close()
    try:
        while len(active_children()) > 0:  # how many active children do we have
            sleep(2)
            ignore = active_children()
    except KeyboardInterrupt:
        exit("CTRL^C caught, exiting...\n\n")
    print("Username bruteforce complete")
Пример #11
0
def pool_multiprocess_index(file_or_urls,shard_size=10000):
    pool = Pool(processes=3)
    for file_ref in file_or_urls:
        reader = pymarc.MARCReader(open(file_ref,'rb'))
        print("Start-up multiprocess pool")
        pool.map_async(index_shard,reader,shard_size)
    print("Finished multiprocess")
    def __init__(self, subID):

        """
        THINGS TO UNDERSTAND:
        - when adding simulation attributes, why is the list nested (need to call simObjList[0])??
        """        
        
        # Create instance of class from superclass
        Subject.__init__(self, subID)
        # Prepare to process in parallel
        # Simulation descriptors
        simDescriptors = ['A_SD2F_RepGRF', 'A_SD2F_RepKIN', 'A_SD2S_RepGRF', 'A_SD2S_RepKIN',
                          'A_Walk_RepGRF', 'A_Walk_RepKIN', 'U_SD2F_RepGRF', 'U_SD2F_RepKIN',
                          'U_SD2S_RepGRF', 'U_SD2S_RepKIN', 'U_Walk_RepGRF', 'U_Walk_RepKIN']
        # List of simulation names
        simNames = [subID + '_' + descriptor for descriptor in simDescriptors]
        # Initialize global variable for simulation objects
        initializeSimList()
        # Start worker pool
        pool = Pool(processes=12)
        # Run parallel processes to process simulations and append object to global list
        pool.map_async(runParallel, simNames, callback=updateSimList)
        # Clean up spawned processes
        pool.close()
        pool.join()
        # Add simulations as attributes to subject object
        for simObj in simObjList[0]:
            setattr(self, simObj.simName, simObj)
        # Display message to user
        print 'Time elapsed for processing subject ' + self.subID + ': ' + str(int(time.time()-self.startTime)) + ' seconds'
Пример #13
0
	def process_batch(self):
		global batch_list

		# Wait for splitting to finish and reinitialize new Pool				
		self.split_pool.close()
		self.split_pool.join()
		self.split_pool = Pool(processes=self.pool_size)
		
		# Filter array for None values
		batch_list = [x for x in batch_list if x is not None]

		# Get category of each word based on keywords
		process_pool = Pool(processes=self.pool_size)
		keyword_result = process_pool.map_async(get_keyword_categories, batch_list)

		# Get category of each word using conceptnet
		#conceptnet_pool = Pool(processes=self.pool_size)
		conceptnet_result = process_pool.map_async(get_conceptnet_categories, batch_list)

		# Get NER categories
		#NER_pool = Pool(processes=self.pool_size)
		NER_result = process_pool.map_async(get_NER_categories, batch_list)

		# Wait for processes in the batch to finish
		print "Keyword"
		sys.stdout.flush()
		keyword_result = keyword_result.get()
		
		#while(not conceptnet_result.ready()):
		#	print conceptnet_result._number_left
		print "NER"
		sys.stdout.flush()
		NER_result = NER_result.get()

		print "Concept net"
		sys.stdout.flush()
		conceptnet_result = conceptnet_result.get()
		#conceptnet_result = process_pool.map(get_conceptnet_categories, batch_list)
		
		

		# Merge results from each type of category
		for i in range(0,len(keyword_result)):
			keyword_result[i].update(conceptnet_result[i])
			keyword_result[i].update(NER_result[i])
			# Build category values based on values of other columns
			keyword_result[i].update(get_columnval_categories(keyword_result[i]))

		# Build and write column values for CSV file
		for i in range(0,len(self.temp_row)):
			val_row = []
			for column in self.headers:
				val_row.append(keyword_result[i][column])

			cur_row = self.temp_row[i] + val_row
			self.file_writer.writerow(cur_row)

		# Reset temporary containers
		self.temp_row = []
		batch_list = [None] * self.batch
Пример #14
0
def run_ntuples(analysis, channel, period, samples, loglevel, **kwargs):
    '''Run a given analyzer for the analysis'''
    logger = logging.getLogger(__name__)
    test = kwargs.pop('test',False)
    metShift = kwargs.pop('metShift','')
    ntup_dir = './ntuples/%s_%iTeV_%s' % (analysis, period, channel)
    python_mkdir(ntup_dir)
    root_dir, sample_names = get_sample_names(analysis,period,samples,**kwargs)


    filelists = {}
    for sample in sample_names:
        sampledir = '%s/%s' % (root_dir, sample)
        filelists[sample] = ['%s/%s' % (sampledir, x) for x in os.listdir(sampledir)]

    if len(sample_names)==1 or test: # only one, its a test, dont use map
        name = sample_names[0]
        outname =  "%s/%s.root" % (ntup_dir, name)
        if test: outname = 'test.root'
        run_analyzer((analysis, channel, name, filelists[name], outname, period, metShift, loglevel))
        return 0

    p = Pool(8)
    try:
        p.map_async(run_analyzer, [(analysis, channel, name, filelists[name], "%s/%s.root" % (ntup_dir, name), period, metShift, loglevel) for name in sample_names]).get(999999)
    except KeyboardInterrupt:
        p.terminate()
        logger.info('Analyzer cancelled')
        sys.exit(1)
   
    return 0
Пример #15
0
def process_albums(albums, output, vk_session):
    """

    :param albums:
    :param output:
    :return:
    """

    logger.info('Begin downloading %s album(s)', len(albums))
    for album in albums:
        offset = 0
        album_folder = path.join(output, album['title'])
        if not path.exists(album_folder):
            mkdir(album_folder)

        logger.debug('Album Size: %s', album['size'])
        while offset <= album['size']:
            photo_urls = get_album_photos(album=album, offset=offset, vk_session=vk_session)
            logger.debug('Got URLs for %s photo(s)', len(photo_urls))

            f = partial(download_photo, album_folder)
            pool = Pool(processes=8)
            pool.map_async(f, photo_urls)
            # And wait till end
            pool.close()
            pool.join()

            offset += 1000

        logger.info(u'Album "%s" [%d] downloaded.', album['title'], album['size'])

    logger.info('%d photo(s) downloaded.' % sum([album['size'] for album in albums]))
Пример #16
0
def run_pipeline_local(threads, reads_list):
	# for read_data in reads_list:
		# multiprocessing(read_data)
	p = Pool(int(threads))
	p.map_async(multiprocessing,reads_list).get(9999999)
	p.close()
	p.join()
Пример #17
0
def multi_download(bucket_name, key_name, dest_file_path, num_processes=10, force_overwrite=True):

    # Check that dest does not exist
    if os.path.exists(dest_file_path) and force_overwrite:
        os.remove(dest_file_path)
    elif os.path.exists(dest_file_path):
        raise ValueError("Destination file '{0}' exists".format(dest_file_path))

    # Touch the file
    fd = os.open(dest_file_path, os.O_CREAT)
    os.close(fd)

    conn = get_conn()
    bucket = conn.lookup(bucket_name, validate=False)
    key = bucket.get_key(key_name)
    size = key.size

    num_parts = num_processes

    def arg_iterator(num_parts):
        for min_byte, max_byte in gen_byte_ranges(size, num_parts):
            yield (bucket.name, key.name, dest_file_path, min_byte, max_byte)

    s = size / (1024.0 * 1024.0)

    try:
        start_time = time.time()
        pool = Pool(processes=num_processes)
        pool.map_async(do_part_download, arg_iterator(num_parts)).get(9999999)
        time_diff = time.time() - start_time
        log("Finished downloading %0.2fM in %0.2fs (%0.2fMbps)" % (s, time_diff, s / time_diff))
    except KeyboardInterrupt:
        log("User terminated")
    except Exception, err:
        log(err)
Пример #18
0
def main():
    logging.basicConfig(level=logging.INFO)
    args = parser.parse_args()
    log.debug("Got args: %s" % args)


    # Check that src is a valid S3 url
    split_rs = urlparse.urlsplit(args.src)
    if split_rs.scheme != "s3":
        raise ValueError("'%s' is not an S3 url" % args.src)

    # Check that dest does not exist
    if os.path.exists(args.dest):
        if args.force:
            os.remove(args.dest)
        else:
            raise ValueError("Destination file '%s' exists, specify -f to"
                             " overwrite" % args.dest)

    # Split out the bucket and the key
    s3 = boto.connect_s3()
    bucket = s3.lookup(split_rs.netloc)
    key = bucket.get_key(split_rs.path)

    # Determine the total size and calculate byte ranges
    conn = boto.connect_s3()
    resp = conn.make_request("HEAD", bucket=bucket, key=key)
    size = int(resp.getheader("content-length"))
    logging.info("Got headers: %s" % resp.getheaders())

    # Skipping multipart if file is less than 1mb
    if size < 1024 * 1024:
        t1 = time.time()
        key.get_contents_to_filename(args.dest)
        t2 = time.time() - t1
        log.info("Finished single-part download of %0.2fM in %0.2fs (%0.2fMbps)" %
                (size, t2, size/t2))
    else:
        # Touch the file
        fd = os.open(args.dest, os.O_CREAT)
        os.close(fd)
    
        num_parts = args.num_processes

        def arg_iterator(num_parts):
            for min_byte, max_byte in gen_byte_ranges(size, num_parts):
                yield (bucket.name, key.name, args.dest, min_byte, max_byte)

        s = size / 1024 / 1024.
        try:
            t1 = time.time()
            pool = Pool(processes=args.num_processes)
            pool.map_async(do_part_download, arg_iterator(num_parts)).get(9999999)
            t2 = time.time() - t1
            log.info("Finished downloading %0.2fM in %0.2fs (%0.2fMbps)" %
                    (s, t2, s/t2))
        except KeyboardInterrupt:
            log.info("User terminated")
        except Exception, err:
            log.error(err)
Пример #19
0
def load_crawlers(database_config_file, data_sources_file):
    pool = Pool(processes=10)
    while True:
        f = open(data_sources_file, 'r')
        xml_string = f.read()
        config = BeautifulSoup(xml_string, "xml")
        f.close()
        crawlers = []
        #print config
        dom = xml.dom.minidom.parseString(xml_string)
        sources = dom.getElementsByTagName("source")
        for source in sources:
            crawler = source.getElementsByTagName("crawler")[0]
            if getText(crawler.getElementsByTagName("enable")[0].childNodes) == '1':
                crawler_data = CrawlerData()
                crawler_data.domain = getText(source.getElementsByTagName("domain")[0].childNodes)
                crawler_data.encode = getText(source.getElementsByTagName("encoding")[0].childNodes)
                crawler_data.source_name = getText(source.getElementsByTagName("source_name")[0].childNodes)
                crawler_data.database_config_file = database_config_file 
                crawler_data.request_interval_seconds = int(getText(crawler.getElementsByTagName("request_interval_seconds")[0].childNodes))
                crawlers.append(crawler_data)
        for c in crawlers:
            print c
        pool.map_async(create_crawler, crawlers).get(999999)
        time.sleep(20)
        
    pool.close()
    pool.join()
Пример #20
0
def brutePlugin(pluginlist,foundplug,hosti,pathi,porti,securei,psize):
	global host
	host = hosti
	global port
	port = porti
	global secure
	secure = securei
	global plugfound
	plugfound = foundplug
	global path
	path = pathi
	f = open(plugfound,'w').close()
	listsize = (len(pluginlist))
	
	# manage pool
	if (psize == 0):	psize = 5
	if (list <= psize):	chunksize = 1
	else:	chunksize = ((listsize / psize) + (listsize % psize))
	print("Plugin list size: %d\tChunk size: %d\tPool size: %d" % ((listsize),chunksize,psize))
	print("Plugin bruteforcing started")
	pool = Pool(processes=psize)
        for chunk in itertools.izip(grouper(pluginlist,chunksize)):  pool.map_async(worker,chunk)
        pool.close()
        try:
                while(len(active_children()) > 0): # how many active children do we have
                        sleep(2)
                        ignore = active_children()
        except KeyboardInterrupt:       exit('CTRL^C caught, exiting...\n\n')
	print("Plugin bruteforce complete")
Пример #21
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    args = parse_command_line(argv)

    loglevel = getattr(logging,args.log)
    logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s', level=loglevel, datefmt='%Y-%m-%d %H:%M:%S')

    branchingPoints = ['ee100','em100','mm100','et100','mt100','tt100','BP1','BP2','BP3','BP4']
    masses = _3L_MASSES if args.analysis=='Hpp3l' else _4L_MASSES
    if args.do4l: masses = _4L_MASSES
    if args.doBoth: masses = _4L_MASSES

    if not args.allMasses: masses = [args.mass]
    if not args.allBranchingPoints: branchingPoints = [args.branchingPoint]

    poolArgs = [[m,b] for m in masses for b in branchingPoints]

    if len(poolArgs)==1:
        job = poolArgs[0]
        BPWrapper((args.analysis,args.channel,args.period,job[0],job[1],args.bgMode,args.scaleFactor,args.doAlphaTest,args.unblind,args.do4l,args.doBoth,args.cut,args.skipTau))
    else:
        p = Pool(8)
        try:
            p.map_async(BPWrapper, [(args.analysis,args.channel,args.period,job[0],job[1],args.bgMode,args.scaleFactor,args.doAlphaTest,args.unblind,args.do4l,args.doBoth,args.cut,args.skipTau) for job in poolArgs]).get(999999)
        except KeyboardInterrupt:
            p.terminate()
            print 'limits cancelled'
            sys.exit(1)
    
    return 0
Пример #22
0
def wzlimits(analysis, region, period, **kwargs):
    cut = kwargs.pop('cut', '1')
    scalefactor = kwargs.pop(
        'scalefactor',
        'event.gen_weight*event.pu_weight*event.lep_scale*event.trig_scale')
    datacardDir = kwargs.pop('datacardDir', './datacards')
    mode = kwargs.pop('mode', 'all')

    poolArgs = []
    for chan in ['eee', 'eem', 'mme', 'mmm']:
        poolArgs += [(analysis, region, period, chan, chan, cut, scalefactor,
                      datacardDir, mode)]

    if len(poolArgs) == 1:
        job = poolArgs[0]
        wzLimitWrapper(job)
    else:
        p = Pool(8)
        try:
            p.map_async(wzLimitWrapper, poolArgs).get(999999)
        except KeyboardInterrupt:
            p.terminate()
            print 'limits cancelled'
            sys.exit(1)

    return 0
Пример #23
0
    def generate(self, options):
        if getattr(settings, "MEDUSA_MULTITHREAD", False):
            # Upload up to ten items at once via `multiprocessing`.
            from multiprocessing import Pool, cpu_count

            print("Generating with up to %d processes..." % cpu_count())
            pool = Pool(cpu_count())

            pool.map_async(
                _disk_render_path,
                ((None, path, None) for path in self.paths),
                chunksize=5
            )
            pool.close()
            pool.join()
        else:
            # Use standard, serial upload.
            self.client = Client()
            if options['medusa_host']:
                host = options['medusa_host']
            elif hasattr(settings, 'MEDUSA_HTTP_HOST'):
                host = settings.MEDUSA_HTTP_HOST
            else:
                host = None
            self.host = host
            for path in self.paths:
                self.render_path(path=path, host=options['medusa_host'])
Пример #24
0
    def run(self):
        tasks = self.get_all_tasks_config()
        self.create_reports(tasks)
        self.init_rally_config()
        self.create_deployment()

        logging.info("Start to run tasks...")
        process_num = 2
        try:
            cpu_num = multiprocessing.cpu_count()
            process_num = [process_num, cpu_num][process_num < cpu_num]
        except Exception:
            logging.info("cpu_count() has not been implemented!")

        logging.info("The number of processes will be %s." % process_num)
        try:
            pool = Pool(processes=process_num)
            pool.map_async(run_task, zip([self] * len(tasks), tasks))
            pool.close()
            pool.join()
        except Exception as ex:
            logging.info("processing pool get exception: '%s'" % ex)

        finally:
            clean_pidfile()
Пример #25
0
def get_feature_data(args):
    bucket = connect_s3().get_bucket('nlp-data')
    widlines = bucket.get_key('datafiles/topwams.txt').get_contents_as_string().split("\n")
    wids = filter(lambda x: x, widlines)[:args.num_wikis]

    log("Loading entities and heads for ", len(wids), "wikis")
    pool = Pool(processes=args.num_processes)
    r = pool.map_async(get_data, wids)
    r.wait()
    wiki_data = defaultdict(dict, r.get())

    log("Getting data from API")
    widstrings = [','.join(wids[i:i+20]) for i in range(0, len(wids), 20)]
    r = pool.map_async(get_wiki_data_from_api, widstrings)
    for grouping in r.get():
        if type(grouping) != dict:
            continue
        for wiki_id, api_data in grouping.items():
            wiki_data[wiki_id]['api_data'] = api_data

    log("Turning data into features")
    wiki_ids, data_dicts = zip(*wiki_data.items())
    log("Working on", len(wiki_ids))
    r = pool.map_async(data_to_features, data_dicts)
    r.wait()
    wid_to_features = zip(wiki_ids, r.get())
    log(len(set([value for _, values in wid_to_features for value in values])), "features")
    return dict(wid_to_features)
Пример #26
0
def brutePasses(userlist,passlist,hosti,pathi,porti,securei,psize,loginsi):
	global host
	host = hosti
	global port
	port = porti
	global secure
	secure = securei
	global logins
	logins = loginsi
	global path
	path = pathi
	global usernames
	usernames = userlist
	usersize = len(userlist)
	passsize = len(passlist)
	
	# manage pool
	if (psize == 0):	psize = 5
	if ((usersize*passsize) <= psize):	chunksize = 1
	else:	chunksize = (((usersize*passsize) / psize) + ((usersize*passsize) % psize))
	#print("%s" % ((ceil(float((usersize*passsize)) / psize)) + ((usersize*passsize) % psize)))
	print("Userlist size: %d\tPassword size: %d\tChunk size: %d\tPool size: %d" % (usersize,passsize,chunksize,psize))
	pool = Pool(processes=psize)
        for chunk in itertools.izip(grouper(passlist,chunksize)):  pool.map_async(worker,chunk)
        pool.close()
        try:
                while(len(active_children()) > 0): # how many active children do we have
                        sleep(2)
                        ignore = active_children()
        except KeyboardInterrupt:       exit('CTRL^C caught, exiting...\n\n')
	print("Password bruteforce attempts completed")
Пример #27
0
def SGDNOMAD(data, movies_, eta_ = 0.01, lambduh_ = 0.1, lambduh_w_ = 0.1, rank = 10, maxit = 10):
    global latentShape, weightShape, biasShape, userOffset, movieOffset, mp_arr, mp_w, mp_b, eta, lambduh, lambduh_w, counter, qsize, movies
    movies = movies_.tocsr()
    t1 = time.time()
    eta = eta_
    lambduh = lambduh_
    lambduh_w = lambduh_w_
    userOffset = 0
    movieOffset = data.shape[0]
   
    # Allocate shared memory across processors for latent variable 
    latentShape = (sum(data.shape), rank)
    mp_arr = mp.Array(ctypes.c_double, latentShape[0] * latentShape[1])
    latent = np.frombuffer(mp_arr.get_obj()).reshape(latentShape)

    weightShape = (latentShape[0], movies.shape[1])
    mp_w = mp.Array(ctypes.c_double, weightShape[0] * weightShape[1])
    weights = np.frombuffer(mp_w.get_obj()).reshape(weightShape)

    biasShape = (latentShape[0], 1)
    mp_b = mp.Array(ctypes.c_double, biasShape[0] * biasShape[1])
    biases = np.frombuffer(mp_b.get_obj()).reshape(biasShape)

    counter = mp.Value('i', 0)
    qsize = mp.Array('i', [0] * FLAGS.cores)

    # Initialize latent variable so that expectation equals average rating
    avgRating = data.sum() / data.nnz
    latent[:] = np.random.rand(latentShape[0], latentShape[1]) * math.sqrt(avgRating / rank / 0.25)
    weights[:] = np.zeros(weightShape)
    biases[:] = np.zeros(biasShape)

    slices = slice(data, FLAGS.cores)
    rowSlices = rowSlice(data, FLAGS.cores)

    p2 = Pool(FLAGS.cores)
    p = Pool(FLAGS.cores)
    it = 0
    printLog(0, 0, time.time() - t1, RMSE2(slices, data.nnz, p2))


    manager = mp.Manager()
    queues = [manager.Queue() for x in range(FLAGS.cores)]

    colList = np.round(np.linspace(0, data.shape[1], (FLAGS.cores) * 20 + 1)).astype(int)

    #for i in range(data.shape[1]):
        #queues[np.random.randint(0, FLAGS.cores)].put(i)

    for i in range(len(colList) - 1):
        r = np.random.randint(0, FLAGS.cores)
        queues[r].put((colList[i], colList[i+1]))
        qsize[r] += 1

    p.map_async(updateNOMAD, [(i, a, b, queues) for i, a, b in rowSlices])

    countPerEpoch = FLAGS.cores * (len(colList) - 1)
    start = time.time()
    #print [q.qsize() for q in queues]
    print [q for q in qsize]
Пример #28
0
    def start(self):
        """Starts the automatic manual analysis"""

        if self.mainConfiguration is None:
            raise Exception("The main configuration of the manual analysis has failed, cannot start it.")                    
        if self.reportingConfiguration is None:
            raise Exception("No reporting configuration found, cannot start the analysis.")

        if self.analysisConfiguration is None:
            raise Exception("The configuration of the manual analysis has failed, cannot start it.")

        self._logger.info(str(self))
 
        # first step is to create the templates for all our emulators
        self._logger.debug("Create {0} templates, one for each emulator".format(self.analysisConfiguration.maxNumberOfEmulators))
        AVDEmulator.createTemplates(self.mainConfiguration, self.analysisConfiguration)
       
        # Create a queue of threads
        distributedQueueManager = Manager()
        listOfAPKs = distributedQueueManager.Queue()
        pool = Pool(processes=self.analysisConfiguration.maxNumberOfEmulators)

        # Tell threads to analyze APKs which are push to the queue
        workerArgs = [(listOfAPKs, iEmulator, self.mainConfiguration, self.analysisConfiguration, self.reportingConfiguration) for iEmulator in range(self.analysisConfiguration.maxNumberOfEmulators)]
        self._logger.debug(workerArgs)
        try:
            pool.map_async(executeExperiment, workerArgs)
            
            # Continuously scan the directory and add identified APKs to ensure at least next emulators round is ready
            while True:
                if listOfAPKs.qsize() > self.analysisConfiguration.maxNumberOfEmulators*2:
                    time.sleep(5)
                    continue
                
                for directory in self.analysisConfiguration.apkFiles:
                    #self._logger.info("Analyzing directory: {0}".format(directory))
                    filenames = os.listdir(directory)

                    if len(filenames) == 0:
                        self._logger.debug("All APKs have been pushed to the analyzing queue, sleeping 5 secs...")
                        time.sleep(5)
                    else:
                        apkFileName = random.choice(filenames)
                        apkFileInputPath = os.path.join(directory, apkFileName)

                        if not os.access(apkFileInputPath, os.R_OK):
                            self._logger.error("You don't have read access to file {0}, not pushing file to queue.".format(apkFileInputPath))
                            continue

                        # compute Sha1 on name the file with it
                        sha1=self._computeSha1(apkFileInputPath)
                        apkFileOutputPath = os.path.join(self.analysisConfiguration.outputDirectory, sha1+".apk")

                        # move APK to output dir
                        shutil.move(apkFileInputPath, apkFileOutputPath)
                        self._logger.info("Pushing APK {0} in queue.".format(apkFileOutputPath))
                        listOfAPKs.put(apkFileOutputPath)
        except KeyboardInterrupt:
            self._logger.error("Automatic analysis interrupted by a keyboard Exception.")
Пример #29
0
def test2():
    for n in range(100000):
        def test3(i):
            n += i
    pool = Pool(processes=1)
    pool.map_async(test3,range(100000))
    pool.close()
    pool.join()
def main():
    args, _ = get_args()
    db, cursor = get_db_and_cursor(args)
    p = Pool(processes=args.num_processes)
    cursor.execute(u"SELECT wiki_id, url FROM wikis ")
    for i in range(0, cursor.rowcount, 500):
        print i
        p.map_async(get_pageviews_for_wiki, [Namespace(row=row, **vars(args)) for row in cursor.fetchmany(500)]).get()
Пример #31
0
    print("..Pulling %s RDS samples from both worlds..." % NUM_RDS_SAMPLES)
    pullRDSsamples(thisWorldDir)
    pullRDSsamples(thisWorldDir + ", noepi")


# queue up all jobs across a maximum of 4 cores
print("Queueing network simulation processes")
print(
    "Putting %s of these in the queue in a few seconds, and then waiting forever"
    % len(paramCombinations))

parallel = True
if parallel:
    #THIS IS AWESOME!
    pool = Pool(processes=8)
    pool.map_async(OutbreakAndRDS, paramCombinations)

    import time
    time.sleep(1500000)
else:
    for x in paramCombinations:
        OutbreakAndRDS(x)

# just do this somewhere else

#    # Analyze how this world did.
#    print("Computing RDS statistics!")
#    executeCommand( "Rscript --vanilla analysis.R %s" % thisWorldDir )

# IT WON'T CONTINUE UNLESS WE JUST WAIT AND WAIT!
Пример #32
0
if __name__ == "__main__":

    gt_folder = sys.argv[1]
    pred_folder = sys.argv[2]

    gt_items = get_xml_files(gt_folder)
    pred_items = get_xml_files(pred_folder)

    all_paths = []
    all_paths.extend(list(gt_items.values()))
    all_paths.extend(list(pred_items.values()))

    p = Pool(processes=None)
    m = Manager()
    q = m.Queue()
    results = p.map_async(convert_xml, all_paths)

    left = float('inf')
    while not results.ready():
        if left != results._number_left:
            print "Left {}".format(results._number_left)
            left = results._number_left
        time.sleep(1)

    results = list(results.get())

    print "Successfuly Ran {}/{}".format(
        sum([1 if r == True else 0 for r in results]), len(results))

    pred_file = "pred.lst"
    gt_file = "gt.lst"
Пример #33
0
def optimize_for_order(conv_params,
                       pool_kernel=None,
                       pool_stride=None,
                       sequential=True):
    # Generate permutations for the order
    loops = ['B/b', 'OW/ow', 'OH/oh', 'IC/ic', 'OC/oc']
    order = set(permutations(loops))

    return_dict = {}
    acc_obj, K, O, S, IC, OC, B, iprec, wprec, im2col, energy_cost = conv_params

    #print('optimizing for convolution layer: weights {}x{}x{}x{}'.format(OC,IC,K,K))
    #print('Batch size: {}'.format(B))

    if pool_kernel is None:
        pool_kernel = (1, 1, 1, 1)
    if pool_stride is None:
        pool_stride = (1, 1, 1, 1)
    conv_params_with_pool = acc_obj, K, O, S, IC, OC, B, iprec, wprec, im2col, energy_cost, pool_kernel, pool_stride

    if not sequential:
        _bound_optimizer_method = functools.partial(_optimize_for_order,
                                                    conv_params_with_pool)

        try:
            pool = Pool(cpu_count())
            results = pool.map_async(_bound_optimizer_method, order).get(10000)
            pool.close()
            pool.join()

            # for o in order:
            #     _bound_optimizer_method(o)
            # exit()

            best_cycles = None
            best_energy = None
            min_cycles = min([x[-4] for x in results])
            min_energy = min([x[-3] for x in results])
            cycles_list = [x[-2] for x in results]
            energy_list = [x[-1] for x in results]
            energy_array = np.stack(energy_list)
            cycles_array = np.stack(cycles_list)
            for r in results:
                tiling, order_type, cycles, energy, _, _ = r
                # print('{}:\n{}\n\t{:1.2f}, {:1.2f}'.format(order_type, tiling, cycles/float(min_cycles), energy/float(min_energy)))
                if best_cycles is None or best_cycles > cycles or (
                        best_cycles == cycles and best_energy > energy):
                    best_cycles = cycles
                    best_energy = energy
                    best_tiling = tiling
                    best_order = order_type
            return best_tiling, best_order, cycles_array, energy_array

        except KeyboardInterrupt:
            pool.terminate()
            pool.join()
            return

    else:
        best_cycles = None
        best_energy = None
        best_tiling = None
        best_order = None
        for o in order:
            tiling, order_type, cycles, energy, _, _ = _optimize_for_order(
                conv_params_with_pool, o)
            if best_cycles is None or best_cycles > cycles:
                best_cycles = cycles
                best_energy = energy
                best_tiling = tiling
                best_order = order_type
            elif best_cycles == cycles and best_energy > energy:
                best_cycles = cycles
                best_energy = energy
                best_tiling = tiling
                best_order = order_type
        return best_tiling, best_order, None, None
Пример #34
0
def correct_strings(partition_alignments, seq_to_acc, ccs_dict, step, nr_cores = 1, verbose = False):
    S_prime = {}
    S_prime_quality = {}

    partition_unique_seq_to_acc = {}
    for m, partition in partition_alignments.items():
        partition_unique_seq_to_acc[m] = {}
        partition_unique_seq_to_acc[m][m] = seq_to_acc[m]
        for s in partition:
            if s in seq_to_acc:
                s_accessions = seq_to_acc[s]
                partition_unique_seq_to_acc[m][s] = s_accessions

    if ccs_dict:
        partitioned_ccs_dict = {}
        for m, partition in partition_alignments.items():
            partitioned_ccs_dict[m] = {}
            for s in partition:
                if s in seq_to_acc:
                    s_accessions = seq_to_acc[s]
                    for s_acc in s_accessions:
                        partitioned_ccs_dict[m][s_acc] = ccs_dict[s_acc]
    else:
        partitioned_ccs_dict = {}
        for m, partition in partition_alignments.items():
            partitioned_ccs_dict[m] = {}       

    if nr_cores == 1:
        for m, partition in sorted(partition_alignments.items()):
            S_prime_partition, S_prime_quality_vectors = correct_to_consensus_helper( ((m, partition, partition_unique_seq_to_acc[m], step, verbose, partitioned_ccs_dict[m]), {}) )
            for acc, s in S_prime_partition.items():
                assert acc not in S_prime
                S_prime[acc] = s

            for acc, qual_vector in S_prime_quality_vectors.items():
                S_prime_quality[acc] = qual_vector


    else:
        ####### parallelize statistical tests #########
        # pool = Pool(processes=mp.cpu_count())
        original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
        signal.signal(signal.SIGINT, original_sigint_handler)
        pool = Pool(processes=nr_cores)
        try:
            res = pool.map_async(correct_to_consensus_helper, [ ( (m, partition, partition_unique_seq_to_acc[m], step, verbose, partitioned_ccs_dict[m]), {}) for m, partition in partition_alignments.items() if len(partition) > 1 ] )
            S_prime_partition_dicts =res.get(999999999) # Without the timeout this blocking call ignores all signals.
        except KeyboardInterrupt:
            print("Caught KeyboardInterrupt, terminating workers")
            pool.terminate()
            sys.exit()
        else:
            # print("Normal termination")
            pool.close()
        pool.join()
        for S_prime_partition, S_prime_quality_vectors in S_prime_partition_dicts:
            for acc, s in S_prime_partition.items():
                assert acc not in S_prime
                S_prime[acc] = s

            for acc, qual_vector in S_prime_quality_vectors.items():
                S_prime_quality[acc] = qual_vector

    return S_prime, S_prime_quality
Пример #35
0
firstIndex = 9251
lastIndex = 14871

def scrapePage(i):
  	try:
		url = baseUrl + str(i)
		page = urllib2.urlopen(url)
		dom = BeautifulSoup(page, 'html.parser')
		crosswordElement = dom.find('div', attrs={'class': 'js-crossword'})
		crosswordDataJson = crosswordElement.get('data-crossword-data')
		crosswordData = json.loads(crosswordDataJson)
		print crosswordData['name']
		return crosswordData		
  	except:
		print 'Scraping crossword data failed:'
		print i

def outputResults(results):
	sanitisedResults = list(filter(lambda x: x is not None, results))
	output = open('crosswords.txt', 'w')
	outputJson = json.dumps(sanitisedResults)
	output.write(outputJson)
	output.close()
	print 'Done! Total crosswords parsed:'
	print len(results)

if __name__ == '__main__':
	pool = Pool()
	r = pool.map_async(scrapePage, range(firstIndex, lastIndex + 1), callback=outputResults)
	r.wait()
Пример #36
0
#!/usr/bin/env python
# _*_ coding: utf-8 _*_

# @author: Drizzle_Zhang
# @file: pool.py
# @time: 2020/3/24 10:43

from time import time
from multiprocessing import Pool


def func(x):
    return x * x


if __name__ == '__main__':
    time_start = time()
    pool = Pool(3)
    list_input = [1, 2, 3, 4, 5, 6]
    # res = [pool.apply_async(func, (i, )) for i in list_input]
    res = pool.map_async(func, list_input)
    pool.close()
    pool.join()
    list_output = res.get()
    time_end = time()
    print(time_end - time_start)
Пример #37
0
    progress_per_part = 70 / len(ont_class_list)

    counter = Value('f', float(20))
    max_processes = len(ont_class_list) if len(
        ont_class_list
    ) <= celery.conf.MAX_MULTIPROCESSING else celery.conf.MAX_MULTIPROCESSING
    pool = Pool(max_processes, p_q_initializer, [counter])

    if len(ont_id_list) > 1:
        fetch_msg = 'Fetching %s' % ', '.join(ont_id_list[:-1])
        fetch_msg += ' and %s...' % ont_id_list[-1]
    else:
        fetch_msg = 'Fetching %s...' % ont_id_list[0]

    pool_result = pool.map_async(
        fetch_and_save_by_class_ont_wrapper,
        zip(ont_class_list, repeat(config_graph), repeat(data_graph),
            repeat(ltw_data_graph), repeat(progress_per_part)))

    pool.close()

    try:
        while not pool_result.ready():
            current_task.update_state(state='PROGRESS',
                                      meta={
                                          'progress_percent':
                                          int(counter.value),
                                          'progress_msg': fetch_msg
                                      })
            sleep(1)
        pool_result.wait()
    except Exception, e:
Пример #38
0
def make_oligomer(input_file,
                  largest_oligo_complexes,
                  report,
                  args,
                  residue_index_mapping=None):
    global workdir
    global input_name
    global verbosity
    global g_input_file
    global g_args
    global best_oligo_template_code
    global renamed_chains_file
    g_input_file = input_file
    g_args = args
    verbosity = args.verbosity
    workdir = os.getcwd()
    symmetry = args.symmetry

    # Subsection 2[a] #######################################################################
    if args.sequence_mode is False:
        input_name = os.path.basename(input_file).split(".pdb")[0].replace(
            '.', '_')
        candidate_qscores = {}
        # Select structurally best oligomeric template using GESAMT
        pctools.print_section(2, 'OLIGOMER ASSEMBLING')
        pctools.print_subsection('2[a]', 'Structural template selection')
        if args.multiprocess is True:
            p = Pool()
            for hitchain, average_qscore, output in p.map_async(
                    analyse_largest_complexes,
                    largest_oligo_complexes.items()).get():
                candidate_qscores[hitchain] = average_qscore
                report['hits'][hitchain]['qscore'] = round(average_qscore, 3)
                print(output)
            p.close()
            p.join()
        else:
            for item in largest_oligo_complexes.items():
                hitchain, average_qscore, output = analyse_largest_complexes(
                    item)
                candidate_qscores[hitchain] = average_qscore
                report['hits'][hitchain]['qscore'] = round(average_qscore, 3)
                print(output)

        best_oligo_template = max(candidate_qscores.keys(),
                                  key=(lambda x: candidate_qscores[x]))
        if candidate_qscores[best_oligo_template] >= args.qscore_cutoff:
            print('Structurally, the best template is: ' + clrs['y'] +
                  best_oligo_template + clrs['n'] + '. Using that!\n')
            report['best_template'] = best_oligo_template.split(':')[0]
            report['best_id'] = report['hits'][best_oligo_template]['id']
            report['best_cov'] = report['hits'][best_oligo_template][
                'coverage']
            report['best_qscore'] = report['hits'][best_oligo_template][
                'qscore']
            report['best_nchains'] = report['hits'][best_oligo_template][
                'final_homo_chains']
        else:
            print('No template had an average Q-score above cut-off of ' +
                  clrs['c'] + str(args.qscore_cutoff) + clrs['n'] +
                  '\nTry lowering the cutoff or running in sequence mode.\n')
            report['exit'] = '4'
            return None, None, report
        report['topology_figure'] = './' + best_oligo_template.replace(
            ':', '_') + '_CHOIR_Topology.png'
        template_chains = largest_oligo_complexes[best_oligo_template]
        best_oligo_template_code = best_oligo_template.split(':')[0]
        clean_template_file = make_local_template(best_oligo_template_code)

    elif args.sequence_mode is True:
        if input_file.endswith('.pdb'):
            input_name = os.path.basename(input_file).split(".pdb")[0].replace(
                '.', '_')
            input_file = os.path.join(
                workdir, input_name + '_CHOIR_MonomerSequence.fasta')
            g_input_file = input_file

        elif input_file.endswith('_CHOIR_MonomerSequence.fasta'):
            input_name = os.path.basename(input_file).split(
                "_CHOIR_MonomerSequence.fasta")[0]

        pctools.print_section(2, 'OLIGOMER ASSEMBLING - SEQUENCE MODE')
        print(clrs['y'] +
              "Skipping section 2[a] - Structural template selection" +
              clrs['n'] + "\n")
        attempt = 0
        while attempt < len(largest_oligo_complexes):
            try:
                best_oligo_template = list(largest_oligo_complexes)[attempt]
                report['best_template'] = best_oligo_template.split(':')[0]
                report['best_id'] = report['hits'][best_oligo_template]['id']
                report['best_cov'] = report['hits'][best_oligo_template][
                    'coverage']
                report['best_qscore'] = 'NA'
                report['best_nchains'] = report['hits'][best_oligo_template][
                    'final_homo_chains']
                report['topology_figure'] = './' + best_oligo_template.replace(
                    ':', '_') + '_CHOIR_Topology.png'
                template_chains = largest_oligo_complexes[best_oligo_template]
                best_oligo_template_code = best_oligo_template.split(':')[0]
                clean_template_file = make_local_template(
                    best_oligo_template_code)
                break
            except:
                attempt += 1
                if attempt < len(largest_oligo_complexes):
                    print('Attempt ' + str(attempt) +
                          ' failed, trying a differente template candidate.')
                if attempt == len(largest_oligo_complexes):
                    print('Failed to find templates in local databases.')
                    report['exit'] = '5'
                    return None, None, report

    relevant_chains_file = extract_relevant_chains(clean_template_file,
                                                   template_chains)
    if args.generate_report is True:
        report['template_figure'], pymol_output = pctools.pymol_screenshot(
            relevant_chains_file, args)
        print(pymol_output)
    renamed_chains_file, chains_dict = rename_relevant_chains(
        relevant_chains_file)
    relevant_chains = [
        chains_dict[template_chain] for template_chain in template_chains
    ]

    # Subsection 2[b] #######################################################################
    pctools.print_subsection('2[b]', 'Generating alignment')
    # Generate per chain alignment files
    alignment_files = []
    if args.sequence_mode is False:
        if args.multiprocess is True:
            p = Pool()
            for qscore, rmsd, fasta_out, gesamt_output in p.map_async(
                    run_gesamt_parallel, chains_dict.values()).get():
                alignment_files.append(fasta_out)
                print(gesamt_output)
            p.close()
            p.join()
        else:
            for chain in chains_dict.values():
                qscore, rmsd, fasta_out, gesamt_output = run_gesamt_parallel(
                    chain)
                alignment_files.append(fasta_out)
                print(gesamt_output)

    elif args.sequence_mode is True:
        if args.multiprocess is True:
            p = Pool()
            for fasta_out, output in p.map_async(alignment_from_sequence,
                                                 chains_dict.values()).get():
                alignment_files.append(fasta_out)
                print(output)
        else:
            for current_chain in chains_dict.values():
                fasta_out, output = alignment_from_sequence(current_chain)
                alignment_files.append(fasta_out)
                print(output)
    print('Alignment files:\n' + clrs['g'] +
          ('\n').join([os.path.basename(i)
                       for i in alignment_files]) + clrs['n'])

    # Generate final alignment which will be the input for Modeller
    final_alignment, full_residue_mapping = generate_ali(
        alignment_files, best_oligo_template_code, residue_index_mapping, args)
    # Score said alignment and enforce treshold
    report[
        'relative_alignment_score'], relative_wscores, nchains = score_alignment(
            final_alignment)
    print('\nFinal average relative score for alignment: ' +
          str(round(report['relative_alignment_score'], 2)) + '%')
    bad_streches = 0
    for wscore in relative_wscores:
        if wscore < args.similarity_cutoff:
            bad_streches += 1
    if bad_streches >= args.bad_streches * nchains:
        if args.sequence_mode is True:
            print(
                '\nThe alignment score was unacceptable for ' + clrs['r'] +
                str(bad_streches) + clrs['n'] +
                ' 30-res segments of the protein complex.\nTry running the default (structure) mode.\n'
            )
        else:
            print(
                '\nThe alignment score was unacceptable for ' + clrs['r'] +
                str(bad_streches) + clrs['n'] +
                ' 30-res segments of the protein complex.\nTry increasing the number of candidate templates or tweaking the similarity cut-offs.\n'
            )
        report['exit'] = '6'
        return None, None, report

    # Subsection 2[c] #######################################################################
    pctools.print_subsection('2[c]', 'Generating models')
    genmodel_file, expected_models = create_genmodel(final_alignment,
                                                     best_oligo_template_code,
                                                     relevant_chains, args)
    run_modeller(genmodel_file)

    # Record list of oligomers built
    nmodels = 0
    built_oligomers = []
    for model in expected_models:
        built_oligomers.append(
            restore_chain_identifiers(model, chains_dict,
                                      full_residue_mapping))
        nmodels += 1
    print(clrs['b'] + 'ProtCHOIR' + clrs['n'] + ' built ' + clrs['c'] +
          str(nmodels) + clrs['n'] + ' model oligomers:')
    for model in built_oligomers:
        print(clrs['g'] + model + clrs['n'])

    return best_oligo_template, built_oligomers, report
Пример #39
0
def ricciCurvature(G,
                   alpha=0.5,
                   weight=None,
                   compute_nc=True,
                   proc=cpu_count(),
                   edge_list=None,
                   method="OTD",
                   verbose=False):
    """
     Compute ricci curvature for all nodes and edges in G.
         Node ricci curvature is defined as the average of all it's adjacency edge.
     :param G: A connected NetworkX graph.
     :param alpha: The parameter for the discrete ricci curvature, range from 0 ~ 1.
                     It means the share of mass to leave on the original node.
                     eg. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr.
     :param weight: The edge weight used to compute Ricci curvature.
     :param compute_nc: True if the average node curvatures should be computed.
     :param proc: Number of processing used for parallel computing
     :param edge_list: Target edges to compute curvature
     :param method: Transportation method, OTD for Optimal transportation Distance,
                                           ATD for Average transportation Distance.
     :param verbose: Set True to output the detailed log.
     :return: G: A NetworkX graph with Ricci Curvature with edge attribute "ricciCurvature"
     """
    # Construct the all pair shortest path lookup
    if importlib.util.find_spec("networkit") is not None:
        import networkit as nk
        t0 = time.time()
        Gk = nk.nxadapter.nx2nk(G, weightAttr=weight)
        apsp = nk.distance.APSP(Gk).run().getDistances()
        length = {}
        for i, n1 in enumerate(G.nodes()):
            length[n1] = {}
            for j, n2 in enumerate(G.nodes()):
                length[n1][n2] = apsp[i][j]
        print(time.time() - t0, " sec for all pair by NetworKit.")
    else:
        print(
            "NetworKit not found, use NetworkX for all pair shortest path instead."
        )
        t0 = time.time()
        length = dict(nx.all_pairs_dijkstra_path_length(G, weight=weight))
        print(time.time() - t0, " sec for all pair.")

    t0 = time.time()
    # compute edge ricci curvature
    p = Pool(processes=proc)

    # if there is no assigned edges to compute, compute all edges instead
    if not edge_list:
        edge_list = G.edges()
    args = [(G, source, target, alpha, length, verbose, method)
            for source, target in edge_list]

    result = p.map_async(_wrapRicci, args)
    result = result.get()
    p.close()
    p.join()

    # assign edge Ricci curvature from result to graph G
    for rc in result:
        for k in list(rc.keys()):
            source, target = k
            G[source][target]['ricciCurvature'] = rc[k]

    # compute node Ricci curvature
    if compute_nc:
        for n in G.nodes():
            rcsum = 0  # sum of the neighbor Ricci curvature
            if G.degree(n) != 0:
                for nbr in G.neighbors(n):
                    if 'ricciCurvature' in G[n][nbr]:
                        rcsum += G[n][nbr]['ricciCurvature']

                # assign the node Ricci curvature to be the average of node's adjacency edges
                G.node[n]['ricciCurvature'] = rcsum / G.degree(n)
                if verbose:
                    print("node %d, Ricci Curvature = %f" %
                          (n, G.node[n]['ricciCurvature']))

    print(time.time() - t0, " sec for Ricci curvature computation.")
    return G
Пример #40
0
def analyze(problem,
            Y,
            calc_second_order=True,
            num_resamples=100,
            conf_level=0.95,
            print_to_console=False,
            parallel=False,
            n_processors=None):
    """Perform Sobol Analysis on model outputs.

    Returns a dictionary with keys 'S1', 'S1_conf', 'ST', and 'ST_conf', where
    each entry is a list of size D (the number of parameters) containing the
    indices in the same order as the parameter file.  If calc_second_order is
    True, the dictionary also contains keys 'S2' and 'S2_conf'.

    Parameters
    ----------
    problem : dict
        The problem definition
    Y : numpy.array
        A NumPy array containing the model outputs
    calc_second_order : bool
        Calculate second-order sensitivities (default True)
    num_resamples : int
        The number of resamples (default 100)
    conf_level : float
        The confidence interval level (default 0.95)
    print_to_console : bool
        Print results directly to console (default False)

    References
    ----------
    .. [1] Sobol, I. M. (2001).  "Global sensitivity indices for nonlinear
           mathematical models and their Monte Carlo estimates."  Mathematics
           and Computers in Simulation, 55(1-3):271-280,
           doi:10.1016/S0378-4754(00)00270-6.
    .. [2] Saltelli, A. (2002).  "Making best use of model evaluations to
           compute sensitivity indices."  Computer Physics Communications,
           145(2):280-297, doi:10.1016/S0010-4655(02)00280-1.
    .. [3] Saltelli, A., P. Annoni, I. Azzini, F. Campolongo, M. Ratto, and
           S. Tarantola (2010).  "Variance based sensitivity analysis of model
           output.  Design and estimator for the total sensitivity index."
           Computer Physics Communications, 181(2):259-270,
           doi:10.1016/j.cpc.2009.09.018.

    Examples
    --------
    >>> X = saltelli.sample(problem, 1000)
    >>> Y = Ishigami.evaluate(X)
    >>> Si = sobol.analyze(problem, Y, print_to_console=True)

    """
    # determining if groups are defined and adjusting the number
    # of rows in the cross-sampled matrix accordingly
    if not problem.get('groups'):
        D = problem['num_vars']
    else:
        D = len(set(problem['groups']))

    if calc_second_order and Y.size % (2 * D + 2) == 0:
        N = int(Y.size / (2 * D + 2))
    elif not calc_second_order and Y.size % (D + 2) == 0:
        N = int(Y.size / (D + 2))
    else:
        raise RuntimeError("""
        Incorrect number of samples in model output file.
        Confirm that calc_second_order matches option used during sampling.""")

    if conf_level < 0 or conf_level > 1:
        raise RuntimeError("Confidence level must be between 0-1.")

    # normalize the model output
    Y = (Y - Y.mean()) / Y.std()

    A, B, AB, BA = separate_output_values(Y, D, N, calc_second_order)
    r = np.random.randint(N, size=(N, num_resamples))
    Z = norm.ppf(0.5 + conf_level / 2)

    if not parallel:
        S = create_Si_dict(D, calc_second_order)

        for j in range(D):
            S['S1'][j] = first_order(A, AB[:, j], B)
            S['S1_conf'][j] = Z * first_order(A[r], AB[r, j], B[r]).std(ddof=1)
            S['ST'][j] = total_order(A, AB[:, j], B)
            S['ST_conf'][j] = Z * total_order(A[r], AB[r, j], B[r]).std(ddof=1)

        # Second order (+conf.)
        if calc_second_order:
            for j in range(D):
                for k in range(j + 1, D):
                    S['S2'][j, k] = second_order(A, AB[:, j], AB[:, k],
                                                 BA[:, j], B)
                    S['S2_conf'][j, k] = Z * second_order(
                        A[r], AB[r, j], AB[r, k], BA[r, j], B[r]).std(ddof=1)

    else:
        tasks, n_processors = create_task_list(D, calc_second_order,
                                               n_processors)

        func = partial(sobol_parallel, Z, A, AB, BA, B, r)
        pool = Pool(n_processors)
        S_list = pool.map_async(func, tasks)
        pool.close()
        pool.join()

        S = Si_list_to_dict(S_list.get(), D, calc_second_order)

    # Print results to console
    if print_to_console:
        print_indices(S, problem, calc_second_order)

    # Add problem context and override conversion method for special case
    S.problem = problem
    S.to_df = MethodType(to_df, S)
    return S
from multiprocessing import Pool, cpu_count
import numpy as np
import time
import math


def great_circle(args):
    lon1, lat1, lon2, lat2 = args
    radius = 3956
    x = math.pi / 180.0
    a = (90.0 - lat1) * (x)
    b = (90.0 - lat2) * (x)
    theta = (lon2 - lon1) * (x)
    c = math.acos((math.cos(a) * math.cos(b)) +
                  (math.sin(a) * math.sin(b) * math.cos(theta)))
    return radius * c


lon1, lat1, lon2, lat2 = 42, 0.5, -13, -32
n = 1e06
mat = np.zeros((n, 4), )
mat = mat + [lon1, lat1, lon2, lat2]

timeStart = time.time()
po = Pool(processes=cpu_count() - 1)
_results = po.map_async(great_circle, (mat[i, :] for i in range(mat.shape[0])))
results = _results.get()

print time.strftime('%H:%M:%S', time.gmtime(time.time() - timeStart))
print 'done'
Пример #42
0
def f(x):
    return x * x


pool = Pool(processes=cpu_count())
result = pool.apply_async(f, [
    1,
])
print "get=%s" % result.get(timeout=2)
pool = Pool(processes=cpu_count())
print "map=%s" % pool.map(f, range(10))

a = []
results = [pool.apply_async(f, [
    i,
], callback=a.append) for i in range(10)]
print "a=%s" % (str(a))
pool.close()
pool.join()
print "a=%s" % (str(a))

pool = Pool(processes=cpu_count())
b = []
results = pool.map_async(f, range(10), callback=b.append, chunksize=10)

print "b=%s" % (str(b))
results.wait()
print "b=%s" % (str(b))
pool.close()
pool.join()
import os

from multiprocessing import Pool

mmin = 110.0
mmax = 150.0
nm = 81

card = "testcard.txt"

cmdlist = []

for i in range(nm):
    mass = mmin + i * (mmax - mmin) / float(nm - 1)
    obsexec = "combine -d %s -m %g -U -M Asymptotic --rRelAcc=0.001 --rAbsAcc=0.001 --minimizerStrategy=0 --rMax=30 --run=expected -n LimitsFromGridObs" % (
        card, mass)
    print obsexec
    cmdlist.append(obsexec)

pool = Pool(processes=20)
pool.map_async(os.system, cmdlist)
pool.close()
pool.join()

os.system("hadd smrel.root higgsCombineLimitsFromGridObs*.root")
os.system("rm higgsCombineLimitsFromGrid*.root")
Пример #44
0
        "target": "i686-pc-windows-msvc-cp36",
        "extension": "zip"
    },
]


def retrieve(url):
    sess = requests.Session()
    print("Getting %s" % urlsplit(url).path.split("/")[-1])
    retrieved = sess.get(url, stream=True)
    # don't continue if something's wrong
    retrieved.raise_for_status()
    try:
        raw_zip = zipfile.ZipFile(io.BytesIO(retrieved.content))
        raw_zip.extractall(path)
    except zipfile.BadZipfile:
        # it's a tar
        tar = tarfile.open(mode="r:gz", fileobj=io.BytesIO(retrieved.content))
        tar.extractall(path)


urls = (url.format(**release) for release in releases)

# let's do this in parallel
pool = Pool(processes=len(releases))
# we could use map, but it consumes the entire iterable (doesn't matter for small n)
res = pool.map_async(retrieve, urls)
# need these if we use _async
pool.close()
pool.join()
Пример #45
0
    def next(self):
        """For python 2.x.
        # Returns
            The next batch.
        """
        # index_array:
        # current_index
        # current_batch_size
        with self.lock:
            index_array, current_index, current_batch_size = next(
                self.index_generator)

        # print("Index array: ")
        # print(index_array)
        # print("current_index: ", current_index)
        # print("current_batch_size: ", current_batch_size)

        # build batch of image data
        batch_x = np.zeros((current_batch_size, ) + (self.frames_per_step, ) +
                           self.image_shape,
                           dtype=K.floatx())  # # my addition of +(1,)
        grayscale = self.color_mode == 'grayscale'

        # print(index_array)
        # print(chunks)
        chunks = [index_array[i::4] for i in range(4)]
        pool = Pool(processes=4)
        batch_x = pool.map_async(get_action_tube, chunks)
        print(batch_x)

        # # Computes the batch for training
        # for i in range(len(index_array)):
        #     action_tube_dir = self.filenames[index_array[i]]
        #     frame_count = 0
        #     for fname in sorted(os.listdir(os.path.join(self.directory, action_tube_dir))):
        #         if (frame_count >= self.frames_per_step):
        #              break # very shit
        #         img = load_img(os.path.join(os.path.join(self.directory, action_tube_dir), fname),
        #                        grayscale=grayscale,
        #                        target_size=self.target_size)
        #         x = img_to_array(img, data_format=self.data_format)
        #         batch_x[i,frame_count] = x
        #         print(frame_count)
        #         frame_count +=1

        # for kk in range(self.frames_per_step):
        #     for i in range(int(len(index_array)/self.frames_per_step)):
        #         fname = self.filenames[index_array[i]]
        #         img = load_img(os.path.join(self.directory, fname),
        #                        grayscale=grayscale,
        #                        target_size=self.target_size)
        #         x = img_to_array(img, data_format=self.data_format)
        #         x = self.image_data_generator.random_transform(x)
        #         x = self.image_data_generator.standardize(x)
        #         x = self.image_data_generator.change_dims(x) # my addition
        #
        #         batch_x[i,kk] = x

        # optionally save augmented images to disk for debugging purposes
        if self.save_to_dir:
            for i in range(current_batch_size):
                img = array_to_img(batch_x[i], self.data_format, scale=True)
                fname = '{prefix}_{index}_{hash}.{format}'.format(
                    prefix=self.save_prefix,
                    index=current_index + i,
                    hash=np.random.randint(1e4),
                    format=self.save_format)
                img.save(os.path.join(self.save_to_dir, fname))
        # build batch of labels
        if self.class_mode == 'input':
            batch_y = batch_x.copy()
        elif self.class_mode == 'sparse':
            batch_y = self.classes[index_array]
        elif self.class_mode == 'binary':
            batch_y = self.classes[index_array].astype(K.floatx())
        elif self.class_mode == 'categorical':
            batch_y = np.zeros((len(batch_x), self.num_class),
                               dtype=K.floatx())
            for i, label in enumerate(self.classes[index_array]):
                batch_y[i, label] = 1.
        else:
            return batch_x
        return batch_x, batch_y
Пример #46
0
            processes = int(sys.argv[i + 1])
        else:
            processes = 2

    t = int(sys.stdin.readline())
    if verbose:
        print >> sys.stderr, "Solving %d test cases" % t

    # read input
    test_cases = [read() for i in xrange(t)]

    # solve
    if parallelize:
        process_pool = Pool(processes=processes)
        if check_mode:
            test_results = process_pool.map_async(check,
                                                  test_cases).get(9999999)
        else:
            test_results = process_pool.map_async(solve,
                                                  test_cases).get(9999999)

    else:
        if check_mode:
            test_results = [check(data) for data in test_cases]
        else:
            test_results = [solve(data) for data in test_cases]

    if verbose:
        sys.stderr.write("\n")
        sys.stderr.flush()

    # write output
Пример #47
0
    except urllib2.HTTPError:
        pass

    if verbose:
        print 'Checking for %s... Missing!' % relative_file
    else:
        sys.stdout.write(u'✗')
    return sdk_artifact


if not verbose:
    print 'Checking for %s artifacts on remote...' % len(sdk_artifacts),
pool = Pool(20)
missing_artifacts = []
pool.map_async(remote_has_artifact,
               sdk_artifacts,
               callback=missing_artifacts.extend).wait(999999)
missing_artifacts = filter(lambda x: x is not None, missing_artifacts)

if not verbose:
    print
print '%s of %s artifacts missing from remote.' % (len(missing_artifacts),
                                                   len(sdk_artifacts))
if verbose:
    print

deployed = 0
for index, missing_artifact in enumerate(missing_artifacts):
    cmd = [
        'mvn', 'deploy:deploy-file', '-DrepositoryId=' + repo_id,
        '-Durl=' + repo_url, '-Dfile=' + missing_artifact['file'],
Пример #48
0
class Predictor(object):
    """Load a pretrained DocReader model and predict inputs on the fly."""
    def __init__(self,
                 model=None,
                 tokenizer=None,
                 normalize=True,
                 embedding_file=None,
                 num_workers=None):
        """
        Args:
            model: path to saved model file.
            tokenizer: option string to select tokenizer class.
            normalize: squash output score to 0-1 probabilities with a softmax.
            embedding_file: if provided, will expand dictionary to use all
              available pretrained vectors in this file.
            num_workers: number of CPU processes to use to preprocess batches.
        """
        logger.info('Initializing model...')
        self.model = Model.load(model or DEFAULTS['model'],
                                normalize=normalize)

        if embedding_file:
            logger.info('Expanding dictionary...')
            words = utils.index_embedding_words(embedding_file)
            added = self.model.expand_dictionary(words)
            self.model.load_embeddings(added, embedding_file)

        logger.info('Initializing tokenizer...')
        annotators = tokenizers.get_annotators_for_model(self.model)
        if not tokenizer:
            tokenizer_class = DEFAULTS['tokenizer']
        else:
            tokenizer_class = tokenizers.get_class(tokenizer)

        if num_workers is None or num_workers > 0:
            self.workers = ProcessPool(
                num_workers,
                initializer=init,
                initargs=(tokenizer_class, annotators),
            )
        else:
            self.workers = None
            self.tokenizer = tokenizer_class(annotators=annotators)

    def predict(self, document, question, candidates=None, top_n=1):
        """Predict a single document - question pair."""
        results = self.predict_batch([(
            document,
            question,
            candidates,
        )], top_n)
        return results[0]

    def predict_batch(self, batch, top_n=1):
        """Predict a batch of document - question pairs."""
        documents, questions, candidates = [], [], []
        for b in batch:
            documents.append(b[0])
            questions.append(b[1])
            candidates.append(b[2] if len(b) == 3 else None)
        candidates = candidates if any(candidates) else None

        # Tokenize the inputs, perhaps multi-processed.
        if self.workers:
            q_tokens = self.workers.map_async(tokenize, questions)
            d_tokens = self.workers.map_async(tokenize, documents)
            q_tokens = list(q_tokens.get())
            d_tokens = list(d_tokens.get())
        else:
            q_tokens = list(map(self.tokenizer.tokenize, questions))
            d_tokens = list(map(self.tokenizer.tokenize, documents))

        examples = []
        for i in range(len(questions)):
            examples.append({
                'id': i,
                'question': q_tokens[i].words(),
                'qlemma': q_tokens[i].lemmas(),
                'document': d_tokens[i].words(),
                'lemma': d_tokens[i].lemmas(),
                'pos': d_tokens[i].pos(),
                'ner': d_tokens[i].entities(),
            })

        # Stick document tokens in candidates for decoding
        if candidates:
            candidates = [{
                'input': d_tokens[i],
                'cands': candidates[i]
            } for i in range(len(candidates))]

        # Build the batch and run it through the model
        batch_exs = batchify([vectorize(e, self.model) for e in examples])
        s, e, score = self.model.predict(batch_exs, candidates, top_n)

        # Retrieve the predicted spans
        results = []
        for i in range(len(s)):
            predictions = []
            for j in range(len(s[i])):
                span = d_tokens[i].slice(s[i][j], e[i][j] + 1).untokenize()
                predictions.append((span, score[i][j]))
            results.append(predictions)
        return results

    def cuda(self):
        self.model.cuda()

    def cpu(self):
        self.model.cpu()
Пример #49
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage="""prog --model model1.hdf,model2.hdf --oldpath refine_01
	Perform a 3d classification like e2refine_multi using the orientation of each particle in an e2refine_easy"""
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)
	parser.add_argument("--newpath", type=str,help="Path to the classified results. Default = multinoali_XX", default=None)
	parser.add_argument("--oldpath", type=str,help="Path to the original refinement", default=None,guitype='filebox', filecheck=False,browser="EMBrowserWidget(withmodal=True,multiselect=False)", row=2, col=0, rowspan=1, colspan=3)
	parser.add_argument("--models","--model", dest="model", type=str,help="Comma separated list of reference maps used for classification. If a single map is provided, data will be split into two groups based on similarity to the single map.", default=None,guitype='filebox', browser='EMModelsTable(withmodal=True,multiselect=True)', filecheck=False, row=7, col=0, rowspan=1, colspan=3)
	parser.add_argument("--simcmp",type=str,help="The name of a 'cmp' to be used in comparing the aligned images. eg- frc:minres=80:maxres=20. Default=ccc", default="ccc", guitype='strbox', row=10, col=0, rowspan=1, colspan=3)
	parser.add_argument("--threads", type=int,help="Number of threads.", default=4, guitype='intbox', row=12, col=0, rowspan=1, colspan=1)
	parser.add_argument("--iter", type=int,help="Number of iterations.", default=1, guitype='intbox', row=12, col=1, rowspan=1, colspan=1)
	parser.add_header(name="optheader", help='Optional parameters:', title="Optional:", row=14, col=0, rowspan=1, colspan=3)
	parser.add_argument("--mask",type=str,help="Name of an optional mask file. The mask is applied to the input models to focus the classification on a particular region of the map. Consider e2classifyligand.py instead.", default=None,guitype='filebox', browser='EMModelsTable(withmodal=True,multiselect=False)', filecheck=False, row=15, col=0, rowspan=1, colspan=3)
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)

	(options, args) = parser.parse_args()
	logid=E2init(sys.argv)

	if not options.model:
		print "No model input. Exit."
		exit()

	inputmodel=options.model.split(',')
	modelstack=0
	if len(inputmodel)==1:
		num=EMUtil.get_image_count(inputmodel[0])
		if num>1:
			modelstack=num
			print "3D stack input. Perform multi-model refinement using existing alignment..."
			multimodel=True
			inputmodel=inputmodel*num
		else:

			multimodel=False
			print "One input model. Split the data by half accroding to the similarity to the input model..."
	else:
		multimodel=True
		print "Multiple input models. Perform multi-model refinement using existing alignment..."

	### make new folder
	if options.newpath == None:
		fls=[int(i[-2:]) for i in os.listdir(".") if i[:11]=="multinoali_" and len(i)==13 and str.isdigit(i[-2:])]
		if len(fls)==0 : fls=[0]
		options.newpath = "multinoali_{:02d}".format(max(fls)+1)

	print "Working directory: {}".format(options.newpath)
	try: os.mkdir(options.newpath)
	except:
		print "New path {} exist. Overwrite...".format(options.newpath)
		pass


	### read json file and parse some parameters
	with open(options.oldpath+"/0_refine_parms.json") as json_file:
		db = json.load(json_file)
	db=parse_json(db.copy())
	options.simcmp=parsemodopt(options.simcmp)

	sym=db["sym"]
	if db["breaksym"]:
		sym="c1"
	### copy the model to the new folder
	print "Preprocessing the input models..."
	if options.mask:
		options.mask="--multfile {}".format(options.mask)
	else:
		options.mask=""

	db_apix=db["apix"]
	if db_apix==0:
		e=EMData(inputmodel[0],0,True)
		db_apix=e["apix_x"]

	if multimodel:

		if modelstack>1:
			models=range(modelstack)
			for m in range(modelstack):
				outfile="{path}/model_input_{k}.hdf".format(path=options.newpath, k=m)
				run("e2proc3d.py {model} {out} --process=filter.lowpass.randomphase:cutoff_freq={freq} --apix={apix} {mask} --first {mi} --last {mi}".format(model=inputmodel[m],out=outfile,freq=1.0/(db["targetres"]*2),apix=db_apix,mask=options.mask, mi=m))
				inputmodel[m]=outfile
		else:

			models=range(len(inputmodel))
			for m in models:
				outfile="{path}/model_input_{k}.hdf".format(path=options.newpath, k=m)
				run("e2proc3d.py {model} {out} --process=filter.lowpass.randomphase:cutoff_freq={freq} --apix={apix} {mask}".format(model=inputmodel[m],out=outfile,freq=1.0/(db["targetres"]*2),apix=db_apix,mask=options.mask))
				inputmodel[m]=outfile


	else:
		models=[0,1]
		outfile="{path}/model_input.hdf".format(path=options.newpath)
		run("e2proc3d.py {model} {out} --process=filter.lowpass.randomphase:cutoff_freq={freq} --apix={apix} {mask}".format(model=inputmodel[0],out=outfile,freq=1.0/(db["targetres"]*2),apix=db_apix,mask=options.mask))
		inputmodel[0]=outfile

	output_3d=[]
	output_cls=[]
	input_eo_order={0:"even",1:"odd"}
	for it in range(options.iter):
		print "Starting iteration {} ...".format(it)
		print "Making projections..."

		if it==0:
		#### first iteration. do one projection for even/odd
			if multimodel:
				projfile=[]
				for m in models:
					projfile.append("{path}/projections_{it:02d}_{k}.hdf".format(path=options.newpath, k=m, it=it))
					run("e2project3d.py {model}  --outfile {proj} -f --orientgen {orient} --sym {sym} --parallel thread:{threads}".format(		model=inputmodel[m],proj=projfile[-1],orient=db["orientgen"],sym=db["sym"],threads=options.threads))
			else:
				projfile=["{path}/projections_{it:02d}.hdf".format(path=options.newpath, it=it)]
				run("e2project3d.py {model}  --outfile {proj} -f --orientgen {orient} --sym {sym} --parallel thread:{threads}".format(		model=inputmodel[0],proj=projfile[0],orient=db["orientgen"],sym=db["sym"],threads=options.threads))

		output_3d.append({})
		output_cls.append({})
		### even/odd loop
		for eoid,eo in input_eo_order.items():

			if it>0:
				inputmodel=[output_3d[-2][eo][m] for m in models]
				print inputmodel
				multimodel=True
			#### make projections for even/odd
				projfile=["{path}/projections_{it:02d}_{k}_{eo}.hdf".format(path=options.newpath, k=m, it=it,eo=eo) for m in models]
				for m in models:
					run("e2project3d.py {model}  --outfile {proj} -f --orientgen {orient} --sym {sym} --parallel thread:{threads}".format(		model=inputmodel[m],proj=projfile[m],orient=db["orientgen"],sym=db["sym"],threads=options.threads))

			oldmapfile=str(db["last_{}".format(eo)])
			ptclfile=str(db["input"][eoid])

			clsmx=oldmapfile.replace("threed","classmx")

			### old projection file is used for classaverage alignment
			oldprojfile=oldmapfile.replace("threed","projections")

			ncls=EMUtil.get_image_count(projfile[0])
			npt=EMUtil.get_image_count(ptclfile)
			newclsmx=["{path}/classmx_{it:02d}_{n}_{eo}.hdf".format(path=options.newpath,n=i,eo=eo,it=it) for i in models]
			classout=["{path}/classes_{it:02d}_{n}_{eo}.hdf".format(path=options.newpath,n=i,eo=eo,it=it) for i in models]
			threedout=["{path}/threed_{it:02d}_{n}_{eo}.hdf".format(path=options.newpath,n=i,eo=eo,it=it) for i in models]
			output_3d[-1][eo]=threedout
			output_cls[-1][eo]=classout

			### get alignment from classmx file and calculate similarity
			print "Calculating similarity matrix..."
			cmxcls=EMData(clsmx,0)
			cmxtx=EMData(clsmx,2)
			cmxty=EMData(clsmx,3)
			cmxalpha=EMData(clsmx,4)
			cmxmirror=EMData(clsmx,5)

			projs=[]
			for pj in projfile:
				projs.append(EMData.read_images(pj))
			xforms=[]
			for i in range(npt):
				c=int(cmxcls[0,i])
				tr=Transform({"type":"2d","alpha":cmxalpha[0,i],"mirror":int(cmxmirror[0,i]),"tx":cmxtx[0,i],"ty":cmxty[0,i]})
				pjs=[projs[k][c] for k in range(len(projfile))]
				xforms.append({"ptclfile":ptclfile,"proj":pjs,"idx":i,"xform":tr,"cmp":options.simcmp})

			pool = Pool()
			corr=pool.map_async(do_compare, xforms)
			pool.close()
			while (True):
				if (corr.ready()): break
				remaining = corr._number_left
				print "Waiting for", remaining, "tasks to complete..."
				time.sleep(2)
			corr=corr.get()
			np.savetxt("{path}/simmx_{it:02d}_{eo}.txt".format(path=options.newpath,eo=eo, it=it),corr)
			#corr=np.loadtxt("{path}/simmx_00_{eo}.txt".format(path=options.newpath,eo=eo))

			### classification
			print "Classifying particles..."
			cmxtmp=cmxcls.copy()
			cmxtmp.to_zero()
			cmxtmp.sub(1)
			cmxout=[cmxtmp.copy() for s in models]



			if multimodel:
				### simply classify
				cls=np.argmin(corr,1)
				print eo,[float(sum(cls==k))/float(npt) for k in models]
				for i in range(npt):
					v=cmxcls[0,i]
					for s in models:
						if s==cls[i]:
							cmxout[s][0,i]=v
						else:
							cmxout[s][0,i]=-1
			else:
				### one model input, split the data to two halves
				for c in range(ncls):
					ss=[]
					ns=0
					for i in range(npt):
						v=cmxcls[0,i]
						if v==c:
							ss.append(corr[i])
							ns+=1
						else:
							ss.append([10]*len(corr[i]))

					### split the data by halv
					spt=int(ns*.5)
					for s in models:
						if s==0:
							toavg=np.argsort(ss)[:spt]
						else:
							toavg=np.argsort(ss)[spt:ns]

						for i in toavg:
							cmxout[s][0,i]=c


			### write classmx
			for s in models:
				cmxout[s].write_image(newclsmx[s])
				ns=EMUtil.get_image_count(clsmx)
				for i in range(1,ns):
					e=EMData(clsmx,i)
					e.write_image(newclsmx[s],i)


			print "Making class average and 3d map..."
			for s in models:
				### class average
				run("e2classaverage.py --input {inputfile} --classmx {clsmx} --decayedge --storebad --output {clsout} --ref {proj} --iter {classiter} -f --normproc {normproc} --averager {averager} {classrefsf} {classautomask} --keep {classkeep} {classkeepsig} --cmp {classcmp} --align {classalign} --aligncmp {classaligncmp} {classralign} {prefilt} --parallel thread:{thrd}".format(
					inputfile=ptclfile, clsmx=newclsmx[s], clsout=classout[s], proj=projfile[s], classiter=db["classiter"], normproc=db["classnormproc"], averager=db["classaverager"], classrefsf=db["classrefsf"],
					classautomask=db["classautomask"],classkeep=db["classkeep"], classkeepsig=db["classkeepsig"], classcmp=db["classcmp"], classalign=db["classalign"], classaligncmp=db["classaligncmp"],
					classralign=db["classralign"], prefilt=db["prefilt"], thrd=options.threads))

				### make 3d
				run("e2make3dpar.py --input {clsout} --sym {sym} --output {threed} {preprocess} --keep {m3dkeep} {keepsig} --apix {apix} --pad {m3dpad} --mode gauss_5 --threads {threads} ".format(
				clsout=classout[s],threed=threedout[s], sym=sym, recon=db["recon"], preprocess=db["m3dpreprocess"],  m3dkeep=db["m3dkeep"], keepsig=db["m3dkeepsig"],
				m3dpad=db["pad"],threads=options.threads, apix=db_apix))

		### post process
		print "Post processing..."
		if os.path.exists("strucfac.txt") :
			m3dsetsf="--setsf strucfac.txt"
		else:
			m3dsetsf=""

		for s in models:
			final3d="{path}/threed_{it:02d}_{n}.hdf".format(path=options.newpath,n=s, it=it)
			run("e2refine_postprocess.py --even {even3d} --odd {odd3d} --output {final3d} --automaskexpand {amaskxp} --align --mass {mass} --iter 0 {amask3d} {amask3d2} {m3dpostproc} {setsf} --sym={sym} --restarget={restarget} --underfilter".format(even3d=output_3d[-1]["even"][s], odd3d=output_3d[-1]["odd"][s], final3d=final3d, mass=db["mass"], amask3d=db["automask3d"], sym=sym, amask3d2=db["automask3d2"], m3dpostproc=db["m3dpostprocess"], setsf=m3dsetsf,restarget=db["targetres"], amaskxp=db.setdefault("automaskexpand","0")))

			### copy the fsc files..
			fscs=["fsc_unmasked_{:02d}.txt".format(it),"fsc_masked_{:02d}.txt".format(it),"fsc_maskedtight_{:02d}.txt".format(it)]
			for fsc in fscs:
				fm=os.path.join(options.newpath,fsc)
				fmnew=os.path.join(options.newpath,fsc[:-4]+"_model_{:02d}.txt".format(s))
				try:
					copyfile(fm,fmnew)
					os.remove(fm)
				except: pass

			if it==options.iter-1:
				### make lists
				tmpcls=["tmpcls_even.lst","tmpcls_odd.lst"]
				tmpcls_m=[l.replace('.','_m1.') for l in tmpcls]
				run("e2classextract.py {clsfile} --refinemulti --setname {tmpcls}".format(clsfile=output_cls[-1]["even"][s],tmpcls=tmpcls[0]))
				run("e2classextract.py {clsfile} --refinemulti --setname {tmpcls}".format(clsfile=output_cls[-1]["odd"][s],tmpcls=tmpcls[1]))
				lstout="sets/{}_{}.lst".format(options.newpath,s)
				run("e2proclst.py {lst1} {lst2} --mergesort {lstout}".format(lst1=tmpcls_m[0], lst2=tmpcls_m[1], lstout=lstout))
				for l in tmpcls_m:
					try: os.remove(l)
					except: pass


	E2end(logid)
Пример #50
0
 if cal:
     t1 = time.time()
     p = Pool(processes=process)
     result = []
     paras = read_para(input_)
     print(paras)
     kwds = [dict(paras=para,
                  code=code,
                  output=output,
                  start_time=start_time,
                  end_time=end_time,
                  symbol=symbol,
                  time_frame=time_frame,
                  log=log,
                  ) for para in paras]
     task = p.map_async(run_backest, kwds)
     # for para in paras:
     #     result.append(
     #         p.apply_async(func=run_backest,
     #                       kwds=dict(paras=para,
     #                                 code=code,
     #                                 output=output,
     #                                 start_time=start_time,
     #                                 end_time=end_time,
     #                                 )
     #                       )
     #
     #     )
     t2 = time.time()
     print('进程池创建完毕,大小:<%s>,总耗时:%s seconds' % (process, t2 - t1))
     # prints "[42, None, 'hello']"
Пример #51
0
        # Reference D04 Clipped Fishnet Output
        d04_output = os.path.join(derived_dir, D04, 'FISHNET', D04_FINAL)

        # Reference LASD
        target_lasd = os.path.join(derived_dir, project_id + '.lasd')

        # Reference Data Domain For Filtering Fishnet
        data_domain = os.path.join(derived_dir, D01, 'RESULTS',
                                   D01_DATA_DOMAIN)

        # Create Filtered Fishnet & Return Extent For Processing
        extent_dict = filter_fishnet(data_domain, base_dir, d04_output)

        # Create Path for Output Rasters
        raster_path = os.path.join(base_dir, 'RASTER')
        os.mkdir(raster_path)

        # Use Multiprocessing Pool for Raster  Generation
        pool = Pool(processes=cpu_count() - 2)
        result = pool.map_async(
            partial(generate_raster, target_lasd, raster_path),
            extent_dict.items())
        pool.close()
        pool.join()

    except Exception as e:
        print('Exception', e)

    finally:
        print('Program Ran: {0}'.format(time.time() - start))
Пример #52
0
    ###############################################################################################################
    store = pd.HDFStore("/mnt/DataGuillaume/corr_pop_no_hd/" + session)
    store.put('rip_corr', rip_corr)
    store.put('allrip_corr', allrip_corr)
    store.put('wak_corr', wak_corr)
    store.put('allwak_corr', allwak_corr)
    store.put('rem_corr', rem_corr)
    store.put('allrem_corr', allrem_corr)
    store.close()
    print(time.clock() - start_time, "seconds")
    return time.clock() - start_time


dview = Pool(8)

a = dview.map_async(compute_population_correlation, sessions).get()
# a = compute_population_correlation(datasets[0])

# ###############################################################################################################
# # PLOT
# ###############################################################################################################
# last = np.max([np.max(allrip_corr[:,0]),np.max(alltheta_corr[:,0])])
# bins = np.arange(0.0, last, 0.2)
# # average rip corr
# index_rip = np.digitize(allrip_corr[:,0], bins)
# mean_ripcorr = np.array([np.mean(allrip_corr[index_rip == i,1]) for i in np.unique(index_rip)[0:30]])
# # average theta corr
# index_theta = np.digitize(alltheta_corr[:,0], bins)
# mean_thetacorr = np.array([np.mean(alltheta_corr[index_theta == i,1]) for i in np.unique(index_theta)[0:30]])

# xt = list(bins[0:30][::-1]*-1.0)+list(bins[0:30])
Пример #53
0
def compress_zso(fname_in, fname_out, level):
    fin, fout = open_input_output(fname_in, fname_out)
    fin.seek(0, os.SEEK_END)
    total_bytes = fin.tell()
    fin.seek(0)

    magic, header_size, block_size, ver, align = ZISO_MAGIC, 0x18, 0x800, 1, DEFAULT_ALIGN

    # We have to use alignment on any ZSO files which > 2GB, for MSB bit of index as the plain indicator
    # If we don't then the index can be larger than 2GB, which its plain indicator was improperly set
    align = total_bytes // 2**31

    header = generate_zso_header(magic, header_size, total_bytes, block_size,
                                 ver, align)
    fout.write(header)

    total_block = total_bytes // block_size
    index_buf = [0 for i in range(total_block + 1)]

    fout.write(b"\x00\x00\x00\x00" * len(index_buf))
    show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level)

    write_pos = fout.tell()
    percent_period = total_block / 100
    percent_cnt = 0

    if MP:
        pool = Pool()

    block = 0
    while block < total_block:
        if MP:
            percent_cnt += min(total_block - block, MP_NR)
        else:
            percent_cnt += 1

        if percent_cnt >= percent_period and percent_period != 0:
            percent_cnt = 0

            if block == 0:
                print("compress %3d%% avarage rate %3d%%\r" %
                      (block / percent_period, 0),
                      file=sys.stderr,
                      end='\r')
            else:
                print("compress %3d%% avarage rate %3d%%\r" %
                      (block / percent_period, 100 * write_pos /
                       (block * 0x800)),
                      file=sys.stderr,
                      end='\r')

        if MP:
            iso_data = [(fin.read(block_size), level)
                        for i in range(min(total_block - block, MP_NR))]
            zso_data_all = pool.map_async(lz4_compress_mp,
                                          iso_data).get(9999999)

            for i, zso_data in enumerate(zso_data_all):
                write_pos = set_align(fout, write_pos, align)
                index_buf[block] = write_pos >> align

                if 100 * len(zso_data) / len(iso_data[i][0]) >= min(
                        COMPRESS_THREHOLD, 100):
                    zso_data = iso_data[i][0]
                    index_buf[block] |= 0x80000000  # Mark as plain
                elif index_buf[block] & 0x80000000:
                    print(
                        "Align error, you have to increase align by 1 or OPL won't be able to read offset above 2 ** 31 bytes"
                    )
                    sys.exit(1)

                fout.write(zso_data)
                write_pos += len(zso_data)
                block += 1
        else:
            iso_data = fin.read(block_size)

            try:
                zso_data = lz4_compress(iso_data, level)
            except Exception as e:
                print("%d block: %s" % (block, e))
                sys.exit(-1)

            write_pos = set_align(fout, write_pos, align)
            index_buf[block] = write_pos >> align

            if 100 * len(zso_data) / len(iso_data) >= COMPRESS_THREHOLD:
                zso_data = iso_data
                index_buf[block] |= 0x80000000  # Mark as plain
            elif index_buf[block] & 0x80000000:
                print(
                    "Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes"
                )
                sys.exit(1)

            fout.write(zso_data)
            write_pos += len(zso_data)
            block += 1

    # Last position (total size)
    index_buf[block] = write_pos >> align

    # Update index block
    fout.seek(len(header))
    for i in index_buf:
        idx = pack('I', i)
        fout.write(idx)

    print("ziso compress completed , total size = %8d bytes , rate %d%%" %
          (write_pos, (write_pos * 100 / total_bytes)))

    fin.close()
    fout.close()
Пример #54
0
    def download(cls, url):
        form = 'https://www.x23qb.com{}'
        html = get(url).content
        soup = BS(html, 'html.parser')

        title = soup.find("div", {"class": 'd_title'}).text
        title = title.strip().split()[0]
        print(title)
        li = soup.find(id='chapterList').findAll('li')
        a = [i.find('a') for i in li]
        chapters = [i.text.split(' ', 1) for i in a]

        j = 0
        before = ''
        for i in chapters:
            if i[0] != before:
                before = i[0]
                j += 1
            i[0] = itoa(j) + i[0]

        novel = {}
        now = ''
        j = 1
        for i in range(len(a)):
            href = form.format(a[i].get('href'))
            volume, chapter = chapters[i]

            if volume != now:
                now = volume
                if now in novel:
                    chapter = chapter.split(' ', 1)
                    if len(chapter) == 1:
                        pass
                    else:
                        volume += chapter[0]
                        chapter = chapter[1]

            novel[volume] = novel.get(volume, []) + [(chapter, href)]

        path = f'./download/{title}/'
        if not os.path.isdir(path):
            os.mkdir(path)

        for volume, a in novel.items():
            pool = Pool(20)
            content = pool.map_async(cls.get_content,
                                     [i[1].rstrip('.html') for i in a])

            pool.close()
            pool.join()
            content.wait()
            content = content.get()

            out = ''
            for i in range(len(a)):
                chapter = a[i][0]
                print(chapter)
                out += f'\n第{i+1}章 {chapter}\n'
                out += '\n\n'.join(content[i])
                out += '\n'

            with open(path + f'{volume}.txt', 'w') as f:
                f.write(out)
Пример #55
0
def main():
    """
    Runner
    """
    args = parse_arguments()
    pool = Pool(args.processes)
    if args.tags:
        cmd = 'behave -d --no-junit --f json --no-summary --no-skipped -t {}'.format(
            ' -t '.join(args.tags))
    else:
        cmd = 'behave -d --no-junit --f json --no-summary --no-skipped'

    p = Popen(cmd, stdout=PIPE, shell=True)
    out, err = p.communicate()
    try:
        j = json.loads(out.decode())
    except ValueError:
        j = []
    # Identify all the feature files that have the tags
    features = [e['location'][:-2] for e in j]

    features_scenarios = []
    for scenario_elements in j:
        # Check if a feature file is reported with no matching scenario, if so skip it
        if 'elements' in scenario_elements:
            for i in scenario_elements['elements']:
                if i['keyword'].upper() in [
                        "scenario".upper(), "scenario outline".upper()
                ]:
                    # Build a list of filepaths for valid scenarios
                    features_scenarios.append(
                        scenario_elements['location'][:-2] + delimiter +
                        i['name'])

    logger.info("Found {} features".format(len(features)))
    logger.info("Found {} scenarios".format(len(features_scenarios)))

    if args.processes > len(features):
        logger.info(
            "You have defined {} and will execute only necessary {} parallel process "
            .format(args.processes, len(features)))
    else:
        logger.info("Will execute {} parallel process".format(args.processes))

    run_feature = partial(_run_feature, tags=args.tags, userdata=args.define)
    logger.info(
        "--------------------------------------------------------------------------"
    )
    output = 0
    failed_tests = passed_tests = []
    # https://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-p
    for feature, scenario, status in pool.map_async(
            run_feature, features_scenarios).get(9999):
        if status == 'PASSED':
            passed_tests.append((feature, scenario, status))
        else:
            failed_tests.append((feature, scenario, status))

    #
    failed_tests_including_rerun = list(set(failed_tests) - set(passed_tests))
    # if output == 0:
    #     failed_tests.append((feature, scenario, status))
    #     if status == "FAILED":
    #         output = 1
    #     else:
    #         output = 2

    logger.info(
        "--------------------------------------------------------------------------"
    )
    end_time = datetime.now()

    if failed_tests_including_rerun:
        output = 1
        for failed_test in failed_tests_including_rerun:
            logger.info("{0:50}: {1} --> {2}".format(failed_test[0],
                                                     failed_test[1],
                                                     failed_test[2]))

    # if failed_tests:
    #     for failed_test in failed_tests:
    #         logger.info(
    #             "{0:50}: {1} --> {2}".format(failed_test[0], failed_test[1], failed_test[2]))

    logger.info("Duration: {}".format(format(end_time - start_time)))
    logger.info("Test Status: {0}".format(str(output)))

    sys.exit(output)
Пример #56
0
def main():
    global opts
    try:
        # TODO: load fofn
        with open(opts.fofn[0], 'r') as f:
            bamlist = [x.rstrip() for x in f]
        N = len(bamlist)
        M = opts.cores
        bamlist = np.array_split(bamlist, M)
        if opts.verbose:
            logging.info('%d bam files in total, and split to %d parts' %
                         (N, M))

        # TODO: build temporary directory
        tmp_dir = tempfile.mkdtemp()
        if opts.verbose:
            logging.info('work in temporary dir %s' % tmp_dir)

        # TODO: run samtools depth on local bamlist
        params = []
        for i, local_bamlist in enumerate(bamlist):
            params.extend([(tmp_dir, i, list(local_bamlist))])

        pool = Pool(opts.cores)
        bedlist = []
        r = pool.map_async(batch_depth, params, callback=bedlist.extend)
        r.wait()
        pool.close()
        pool.join()

        # TODO: run bedtools on bedlist
        if opts.verbose:
            logging.info('merge the results')

        # TODO: join local results
        if opts.verbose:
            Z = 0
            for b in bedlist:
                Z += int(os.path.getsize(b))
            logging.info('join {} depth-files in total size {} Mb'.format(
                M, round(Z / 1024. / 1024., 5)))

        bed = os.path.join(tmp_dir, 'all.bed')
        if len(bedlist) == 1:
            subprocess.call(['mv', bedlist[0], bed])
        else:
            p = pipes.Template()
            p.append(r'join -t $%s\t%s -e 0 -a 1 -a 2 -j 1 -o 0,1.2,2.2 %s %s 2>/dev/null'%\
                     ("'","'",bedlist[0],bedlist[1]),\
                      '--')
            for i in xrange(2, len(bedlist)):
                p.append(r'join -t $%s\t%s -e 0 -a 1 -a 2 -j 1 -o 0,%s,2.2 - %s 2>/dev/null'%\
                         ("'","'",','.join(['1.%d' % (x+2) for x in xrange(i)]),bedlist[i]),\
                          '--')
            f = p.open(bed, 'w')
            f.close()

        # TODO: split into parts
        wc = subprocess.Popen(['wc', '-l', str(bed)], stdout=subprocess.PIPE)
        wc_res, wc_err = wc.communicate()
        LN = int(wc_res.strip().split()[0])
        if opts.verbose:
            logging.info('split %d nucleotide positions into %d parts of each with %d positions' %\
                        (LN,opts.cores,int(np.ceil(LN/float(opts.cores)))))
        subprocess.call(['split','-a','3','-d','-l',str(int(np.ceil(LN/float(opts.cores)))),str(bed),\
                        str(os.path.join(tmp_dir,'all_bed_split'))])
        bedsplit = [
            os.path.join(tmp_dir, 'all_bed_split%03d' % x)
            for x in xrange(opts.cores)
        ]

        params = []
        for i, bs in enumerate(bedsplit):
            params.extend([(tmp_dir, i, M, bs)])

        pool2 = Pool(opts.cores)
        bedsplit = []
        r2 = pool2.map_async(nucleotide_depth,
                             params,
                             callback=bedsplit.extend)
        r2.wait()
        pool2.close()
        pool2.join()

        # TODO: report
        if opts.verbose:
            logging.info('merge %d parts, then sort' % opts.cores)
        p = pipes.Template()
        p.append('cat %s' % " ".join([str(x) for x in bedsplit]), '--')
        p.append('sort -k1,1n -k2,2n', '--')
        f = p.open(bed, 'w')
        f.close()
        proc_sum_bed = subprocess.Popen([
            'bedtools', 'merge', '-c', '4', '-o', 'mean', '-d', '10', '-i',
            str(bed)
        ])
        proc_sum_bed.wait()
        if opts.verbose:
            logging.info('rm %s' % tmp_dir)
        #shutil.rmtree(tmp_dir)
        if opts.verbose:
            logging.info('complete')

    except Exception as e:
        logging.exception('rm %s' % tmp_dir)
        shutil.rmtree(tmp_dir)
        raise e
Пример #57
0
n_workers = int(raw_input("How many workers should I use? "))
print n_workers

#--------------------------------------------------------------------------------------
# Make a pool to execute the commands
#--------------------------------------------------------------------------------------

pool = Pool(n_workers)

#--------------------------------------------------------------------------------------
# Try to get the pool of commands to run.
# If the user wants to quit, save the remaining copies to a file.
#--------------------------------------------------------------------------------------

try:
    pool.map_async(printAndExecute, command_list).get(99999999)

except KeyboardInterrupt:

    #--------------------------------------------------------------------------------------
    # First, terminate the pool
    #--------------------------------------------------------------------------------------

    pool.terminate()

    #--------------------------------------------------------------------------------------
    # Now write the remaining commands to a .txt file
    #--------------------------------------------------------------------------------------

    print "\n"
    print "\n"
Пример #58
0
POST autograder results (in parallel, load testing)
"""


def post_grade(data):
    h = {'content-type': 'application/json'}
    addr = 'http://localhost:%d' % (server.server_address[1])
    res = requests.post(addr, data=data, headers=h)
    if not res.ok:
        logger.error('Failed request! Status code: {0}\n{1}'.format(
            res.status_code, res.content))


logger.warn('Starting load test thread pool...')
pool = Pool(num_threads)
pool.map_async(post_grade, data_packets)
"""
Handle results in work queue
"""
time.sleep(5)
logger.warn('Starting rq worker...')
util.start_rq_worker(work_queue)
"""
Check for successful entry
"""
logger.warn('Checking entries...')
for cq in check_queries:
    grade = connection.Grade.find_one(cq[0])
    if not grade:
        logger.error('Missing grade!\n{0}'.format(jprint.pformat(cq[1])))
Пример #59
0
def fragments(bam,
              fragment_path,
              min_mapq=30,
              nproc=1,
              cellbarcode="CB",
              chromosomes="(?i)^chr",
              readname_barcode=None,
              cells=None,
              max_distance=5000,
              min_distance=10,
              chunksize=500000,
              shifts=[4, -5],
              collapse_within=False):
    """Create ATAC fragment file from BAM file

    Iterate over reads in BAM file, extract fragment coordinates and cell barcodes.
    Collapse sequencing duplicates.

    Parameters
    ----------
    bam : str
        Path to BAM file
    fragment_path : str
        Path for output fragment file
    min_mapq : int
        Minimum MAPQ to retain fragment
    nproc : int, optional
        Number of processors to use. Default is 1.
    cellbarcode : str
       Tag used for cell barcode. Default is CB (used by cellranger)
    chromosomes : str, optional
        Regular expression used to match chromosome names to include in the
        output file. Default is "(?i)^chr" (starts with "chr", case-insensitive).
        If None, use all chromosomes in the BAM file.
    readname_barcode : str, optional
        Regular expression used to match cell barocde stored in read name.
        If None (default), use read tags instead. Use "[^:]*" to match all characters
        before the first colon (":").
    cells : str
        File containing list of cell barcodes to retain. If None (default), use all cell barcodes
        found in the BAM file.
    max_distance : int, optional
        Maximum distance between integration sites for the fragment to be retained.
        Allows filtering of implausible fragments that likely result from incorrect
        mapping positions. Default is 5000 bp.
    min_distance : int, optional
        Minimum distance between integration sites for the fragment to be retained.
        Allows filtering implausible fragments that likely result from incorrect
        mapping positions. Default is 10 bp.
    chunksize : int
        Number of BAM entries to read through before collapsing and writing
        fragments to disk. Higher chunksize will use more memory but will be
        faster.
    shifts : list
        Fragment position shifts to apply. First element defines shift for + strand,
        second element defines shift for - strand.
    collapse_within : bool
        Only collapse fragments containing the same start and end coordinate within the
        same cell barcode. Setting to True will only collapse duplicates if the cell barcode
        is the same (allows same fragment coordinates with different cell barcode)
    """
    nproc = int(nproc)
    chrom = utils.get_chromosomes(bam, keep_contigs=chromosomes)
    cells = utils.read_cells(cells)
    p = Pool(nproc)
    frag_lists = [
        p.map_async(
            functools.partial(getFragments,
                              bam=bam,
                              min_mapq=int(min_mapq),
                              cellbarcode=cellbarcode,
                              readname_barcode=readname_barcode,
                              cells=cells,
                              max_distance=max_distance,
                              min_distance=min_distance,
                              chunksize=chunksize,
                              shifts=shifts,
                              collapse_within=collapse_within),
            list(chrom.items()),
        )
    ]
    filenames = [res.get() for res in frag_lists]
    # cat files and write to output
    with open(fragment_path, "w") as outfile:
        for i in filenames:
            for j in i:
                with open(j, "r") as infile:
                    for line in infile:
                        outfile.write(line)
                os.remove(j)
Пример #60
0
	###############################################################################################################
	store 			= pd.HDFStore("/mnt/DataGuillaume/corr_pop/"+session.split("/")[1]+".h5")
	store.put('rip_corr', rip_corr)
	store.put('allrip_corr', allrip_corr)
	store.put('wak_corr', wak_corr)
	store.put('allwak_corr', allwak_corr)
	store.put('rem_corr', rem_corr)
	store.put('allrem_corr', allrem_corr)	
	store.close()
	print(time.clock() - start_time, "seconds")
	return time.clock() - start_time


dview = Pool(8)

a = dview.map_async(compute_population_correlation, datasets)
# a = compute_population_correlation(datasets[0])	


# ###############################################################################################################
# # PLOT
# ###############################################################################################################
# last = np.max([np.max(allrip_corr[:,0]),np.max(alltheta_corr[:,0])])
# bins = np.arange(0.0, last, 0.2)
# # average rip corr
# index_rip = np.digitize(allrip_corr[:,0], bins)
# mean_ripcorr = np.array([np.mean(allrip_corr[index_rip == i,1]) for i in np.unique(index_rip)[0:30]])
# # average theta corr
# index_theta = np.digitize(alltheta_corr[:,0], bins)
# mean_thetacorr = np.array([np.mean(alltheta_corr[index_theta == i,1]) for i in np.unique(index_theta)[0:30]])