Exemplo n.º 1
0
 def _renew_pp_server(self, servers = None, ncpus = None):
     '''
     @summary: renews the pp server
     @param servers: tuple of all parallel python servers to connect with, example ("*",) = auto-discover, ("10.0.0.1","10.0.0.2") # list of static IPs
     @param ncpus: number of CPUs to reserve 
     '''     
     try:
         self.job_server.destroy()
     except:
         pass
     
     if servers == "serial":
         self.job_server = None
         self.number_of_cores = None
         self.parallelmode = False
     else:
         import pp
         if servers == "auto":
             servers = None    
         if (servers == None) and (ncpus == None):
             self.job_server = pp.Server()     
         elif (servers == None) and (ncpus != None):
             self.job_server = pp.Server(ncpus = ncpus)
         elif (servers!= None) and (ncpus == None):
             self.job_server = pp.Server(ppservers=servers)
         elif (servers!= None) and (ncpus != None):
             self.job_server = pp.Server(ppservers=servers, ncpus = ncpus)
             
         self.number_of_cores = self.job_server.get_ncpus()
         self.parallelmode = True
         print "Parallel Python option started with", str(self.number_of_cores), "cores"
Exemplo n.º 2
0
    def communities(self, parallel):
        print "Training classifiers..."
        # multiprocessing.freeze_support()
        # cores=mp.cpu_count()
        # cores = multiprocessing.cpu_count()
        # pool = mp.ProcessingPool(4)
        # subgraphs={}
        classifiers = {}
        deg_centra = {}
        between_centra = {}
        load_centra = {}
        avg_nei_deg = {}
        harmonic_centra = {}
        close_centra = {}
        # score={}
        # for i,y in enumerate(pool.imap(self.get_classifiers,self.comm2node.keys())):
        #     print i
        # subgraph=y[0]
        # classifiers[i]=y[1]
        # deg_centra[i],between_centra[i],load_centra[i],avg_nei_deg[i],harmonic_centra[i],close_centra[i]=self.attributes(y[0])
        # score=dict(score,**y[2])
        if parallel == False:
            start_time = time.time()
            for comm in self.comm2node.keys():
                print comm
                # nodes=self.comm2node[comm]
                subgraph = self.graph.subgraph(self.comm2node[comm])
                # subgraphs[comm]=subgraph
                classifiers[comm] = Predictor.training(subgraph)
                deg_centra[comm], between_centra[comm], load_centra[
                    comm], avg_nei_deg[comm], harmonic_centra[
                        comm], close_centra[comm] = self.attributes(subgraph)
            print 'non-parallel:', time.time() - start_time, 's'
        else:
            ppservers = ()
            if len(sys.argv) > 1:
                ncpus = int(sys.argv[1])
                # Creates jobserver with ncpus workers
                job_server = pp.Server(ncpus, ppservers=ppservers)
            else:
                # Creates jobserver with automatically detected number of workers
                job_server = pp.Server(ppservers=ppservers)

            print "pp 可以用的工作核心线程数", job_server.get_ncpus(), "workers"
            # comms=list(self.comm2node.keys())
            start_time = time.time()
            jobs = [(comm,
                     job_server.submit(self.get_classifiers, (comm, ), (),
                                       ("Predictor", )))
                    for comm in self.comm2node.keys()]
            # print "yes"
            for comm, job in jobs:
                print comm
                classifiers[comm] = job()[1]
                deg_centra[comm],between_centra[comm],load_centra[comm],\
                avg_nei_deg[comm],harmonic_centra[comm],close_centra[comm]=self.attributes(job()[0])
            print 'parallel:', time.time() - start_time, 's'

        return classifiers, deg_centra, between_centra, load_centra, avg_nei_deg, harmonic_centra, close_centra
Exemplo n.º 3
0
def NCVtrain(args):
    ### input
    M_files = [
        os.path.abspath(line.strip()) for line in open(args.male)
        if line.strip() != ""
    ]
    F_files = [
        os.path.abspath(line.strip()) for line in open(args.female)
        if line.strip() != ""
    ]
    job_server = pp.Server(min(6, args.cpus), ppservers=("*:3456", ))
    M_counts = tqdm_pp_jobs([
        job_server.submit(get_readcount_table, (f, ), modules=("pysam", ))
        for f in M_files
    ],
                            desc="Male train samples")
    F_counts = tqdm_pp_jobs([
        job_server.submit(get_readcount_table, (f, ), modules=("pysam", ))
        for f in F_files
    ],
                            desc="Female train samples")
    chroms = ["chr" + str(i) for i in range(1, 23)] + ["chrX", "chrY"]
    df = pd.DataFrame(
        {
            chr: [sum(counts[chr]) for counts in M_counts + F_counts]
            for chr in chroms
        },
        index=M_files + F_files,
        dtype="float")
    df['Gender'] = ["M"] * len(M_counts) + ["F"] * len(F_counts)
    ### train reference chromosome set
    inputs = [("chr" + str(i), df) for i in range(1, 23)]
    inputs += [("chrX", df[df.Gender == "F"]), ("chrY", df[df.Gender == "F"])]
    job_server = pp.Server(args.cpus, ppservers=("*:3456", ))
    refset = tqdm_pp_jobs([
        job_server.submit(optimize, i, modules=("numpy", "pandas"))
        for i in inputs
    ],
                          desc="Enumerating reference set")
    ### calculate mean/std for each bin
    bin_mean_std = {}
    pickle.dump((refset, bin_mean_std), open(args.model, "w"))
    for chr, comb, _, _ in tqdm(refset, desc="Bin training"):
        bin_num = [len(counts[chr]) for counts in M_counts + F_counts]
        if min(bin_num) != max(bin_num):
            raise Exception("BAMs from different ref genome")
        bin_mean_std[chr] = []
        for i in range(bin_num[0]):
            bin_count = [
                counts[chr][i] for counts in (
                    F_counts if chr in ['chrX', 'chrY'] else M_counts +
                    F_counts)
            ]
            ref_count = df[df.Gender == "F"] if chr in ['chrX', 'chrY'] else df
            bin_ratio = bin_count / ref_count[comb].sum(axis=1)
            bin_mean_std[chr] += [(bin_ratio.mean(), bin_ratio.std())]
    pickle.dump((refset, bin_mean_std), open(args.model, "w"))
Exemplo n.º 4
0
def cerenkov_ml(task_table, method_table, fold_table, hp_table, data_table,
                fold_assign_method, ncpus, feature_reduced):

    if fold_assign_method == "SNP":
        task_table["auroc"] = -1.0
        task_table["aupvr"] = -1.0
    else:
        task_table["avgrank"] = -1.0

    jobs = []
    result = []
    ppservers = ()

    if ncpus == -1:
        job_server = pp.Server(ppservers=ppservers)
        print "Starting with ", job_server.get_ncpus(), "CPUs"
    else:
        job_server = pp.Server(ncpus, ppservers=ppservers)
        print "Starting with ", job_server.get_ncpus(), "CPUs"

    for task_no, task in task_table.iterrows():

        method = task["method"]
        method_name = method.__name__
        hp_ind = task["hp"]
        hp = hp_table[method_name].ix[hp_ind, "hp"]  # time: 0.0004
        data_ind = task["data"]  # time: 3.0e-5
        data = data_table[data_ind]  # time: 9.0e-7
        fold_ind = task["fold"]  # time: 2.0e-5
        fold = fold_table.loc[[fold_ind]]  # time: 6.0e-4
        args = (data, hp, fold, fold_assign_method, task_no)  # time: 5.0e-6
        jobs.append(
            job_server.submit(method,
                              args,
                              modules=("time", "pandas", "numpy", "xgboost",
                                       "sklearn.ensemble")))  # time: 0.08
        # job_server.submit(method, args, modules=("time", "pandas", "numpy", "xgboost", "sklearn.ensemble")) # time: 0.15
        print "[INFO] a job submitted!", "task_no", task_no
    s_time = time.time()
    for j in jobs:

        result = j()

        if fold_assign_method == "SNP":
            result_task_no = result["task_no"]
            task_table.ix[result_task_no, "auroc"] = result["auroc"]
            task_table.ix[result_task_no, "aupvr"] = result["aupvr"]
        else:
            result_task_no = result["task_no"]
            task_table.ix[result_task_no, "avgrank"] = result["avgrank"]

    job_server.print_stats()
    job_server.destroy()

    pickle.dump(task_table, open("task_table.p", "wb"))
    print "[INFO] dump to task_table.p!"
    return task_table
def optimalBandwidthSelection(Y, X, *args):
    '''
        for a more reasonable search grid that is robust to the outliers
        use log than transform back to level 
    '''
    # add minor value to enable
    if len(args) == 0:
        logX = numpy.log(X - X.min() + 0.00001)
        hVec = numpy.array(numpy.linspace(logX.min(), logX.max(), 52))
        # cut off the first couple, which will be too small anyway
        hVec = numpy.delete(hVec, [0, 1, 2, 3, 51], 0)
        hVec = numpy.exp(hVec)
    elif len(args) == 1:
        hVec = args[0]
    else:
        raise TypeError('Error: hVec format is wrong.')

    numH = hVec.shape[0]
    rHatVecPP = numpy.zeros((numH, 1))

    import pp, sys

    # tuple of all parallel python servers to connect with
    ppservers = ()

    if len(sys.argv) > 1:
        ncpus = int(sys.argv[1])
        # Creates jobserver with ncpus workers
        job_server = pp.Server(ncpus, ppservers=ppservers)
    else:
        # Creates jobserver with automatically detected number of workers
        job_server = pp.Server(ppservers=ppservers)

    # see what comes out
    jobs = [(i,
             job_server.submit(LOORiskEstFast, (Y, X, hVec[i]),
                               (polynomialFit, ), (
                                   "numpy",
                                   "kernel",
                               ))) for i in range(numH)]

    for i, job in jobs:
        result = job()
        rHatVecPP[i] = result

    job_server.destroy()

    #start_time = time.time()
    #for iH in range(numH):
    #    rHatVecNP[iH] = LOORiskEstFast(Y,X,hVec[iH])
    #print "NP: Time elapsed ", time.time()-start_time

    # now get the
    optimalIdx = numpy.argmin(rHatVecPP)
    return hVec[optimalIdx]
Exemplo n.º 6
0
    def __initializeRay(self):
        """
      Internal method that is aimed to initialize the internal parallel system.
      It initilizes the RAY implementation (with socketing system) in
      case RAVEN is run in a cluster with multiple nodes or the NumMPI > 1,
      otherwise multi-threading is used.
      @ In, None
      @ Out, None
    """
        ## set up enviroment
        os.environ['PYTHONPATH'] = os.pathsep.join(sys.path)
        ## Check if the list of unique nodes is present and, in case, initialize the
        servers = None
        if self.runInfoDict['internalParallel']:
            if len(self.runInfoDict['Nodes']) > 0:
                availableNodes = [
                    nodeId.strip() for nodeId in self.runInfoDict['Nodes']
                ]
                ## identify the local host name and get the number of local processors
                localHostName = self.__getLocalHost()
                self.raiseADebug("Local host name is  : ", localHostName)
                nProcsHead = availableNodes.count(localHostName)
                self.raiseADebug("# of local procs    : ", str(nProcsHead))
                ## initialize ray server with nProcs
                self.rayServer = ray.init(
                    num_cpus=int(nProcsHead)) if _rayAvail else pp.Server(
                        ncpus=int(nProcsHead))
                ## Get localHost and servers
                servers = self.__runRemoteListeningSockets(
                    self.rayServer['redis_address'])
            else:
                self.rayServer = ray.init(num_cpus=int(self.runInfoDict['totalNumCoresUsed'])) if _rayAvail else \
                                 pp.Server(ncpus=int(self.runInfoDict['totalNumCoresUsed']))
            if _rayAvail:
                self.raiseADebug("Head node IP address: ",
                                 self.rayServer['node_ip_address'])
                self.raiseADebug("Redis address       : ",
                                 self.rayServer['redis_address'])
                self.raiseADebug("Object store address: ",
                                 self.rayServer['object_store_address'])
                self.raiseADebug("Raylet socket name  : ",
                                 self.rayServer['raylet_socket_name'])
                self.raiseADebug("Session directory   : ",
                                 self.rayServer['session_dir'])
                if servers:
                    self.raiseADebug("# of remote servers : ",
                                     str(len(servers)))
                    self.raiseADebug("Remote servers      : ",
                                     " , ".join(servers))

        else:
            ## We are just using threading
            self.rayServer = None

        self.isRayInitialized = True
def main():

    #tuple of all parallel python servers to connect with
    ppservers = ()
    #ppservers = ("10.0.0.1",)

    if len(sys.argv) > 1:
        ncpus = int(sys.argv[1])
        folder_number = int(sys.argv[2])

        # Creates jobserver with ncpus workers
        job_server = pp.Server(ncpus, ppservers=ppservers)
    else:
        # Creates jobserver with automatically detected number of workers
        job_server = pp.Server(ppservers=ppservers)

    print "Starting pp with", job_server.get_ncpus(), "workers"
    print "Folder Number == ", folder_number

    folders = range(0, folder_number + 26, 25)
    intervales = []
    for i in range(len(folders) - 1):
        intervales.append((folders[i], folders[i + 1]))
    print intervales
    #Parallel evolution for every lamda value
    print "Start running jobs"
    jobs = [(interval,
             job_server.submit(get_min_generation, (interval, ),
                               modules=("numpy", "pandas", "os")))
            for interval in intervales]

    #archive_strc = numpy.zeros((24,4))
    archive_strc = []
    for folder, job in jobs:
        res = job()
        print folder, "===", res
        archive_strc.append(res)
    archive_strc = numpy.concatenate(archive_strc)

    print archive_strc.shape
    print "                                      ", [
        "|| N ||", "|| F ||", "||S||", "||FREQ||"
    ]
    print "The average number of generations is ", numpy.mean(archive_strc,
                                                              axis=0)
    print "The median number of generations is ", numpy.median(archive_strc,
                                                               axis=0)

    print "Number of success === ", [
        len(success[success < 100]) for success in archive_strc.T
    ]
    print "Number of failures === ", [
        len(success[success == 100]) for success in archive_strc.T
    ]
def start():

    DateinOutput = datetime.datetime.now().strftime("%Y-%m-%d")
    if platform.system() == 'Windows':
        file = '..\\..\\data\\rain\\5-9_2017.csv'
        pathIR8 = '..\\..\\data\\irdata\\ir08nc\\'
        pathIR13 = '..\\..\\data\\irdata\\ir13nc\\'
        pathIR15 = '..\\..\\data\\irdata\\ir15nc\\'
        outputPath = '..\\..\\output_{0}\\'.format(DateinOutput)

    else:
        file = '/data/rain/8-9_2017.csv'
        pathIR8 = '/data/dl_hackathon_data_2/ir08nc/'
        pathIR13 = '/data/dl_hackathon_data_2/ir13nc/'
        pathIR15 = '/data/dl_hackathon_data_2/ir15nc/'
        outputPath = '/home/team7/hackathon/Test'

    if os.path.isdir(outputPath) == False:
        os.mkdir(outputPath)

    header = ['Date', 'Lat', 'Long', 'Rain']
    df = pandas.read_csv(file, names=header)
    date = pandas.unique(df['Date'])

    #######################################  Parallel  ####################################################
    ppservers = ()

    if len(sys.argv) > 1:
        ncpus = int(sys.argv[1])
        # Creates jobserver with ncpus workers
        job_server = pp.Server(ncpus, ppservers=ppservers)
    else:
        # Creates jobserver with automatically detected number of workers
        job_server = pp.Server(ppservers=ppservers)
    print("Starting pp with", job_server.get_ncpus(), "workers")
    #################################################################################################

    for dt in date:

        IR8_check = checkIR8(dt, pathIR8)
        IR13_check = checkIR13(dt, pathIR13)
        IR15_check = checkIR15(dt, pathIR15)
        if IR15_check != None and IR13_check != None and IR8_check != None:
            print(dt)
            job_server.submit(ReadFile, (
                dt,
                df,
                IR8_check,
                IR13_check,
                IR15_check,
                outputPath,
            ), (WriteToCSV, ),
                              ("os", "netCDF4", 'pandas', 'numpy', 'datetime'))
Exemplo n.º 9
0
def create_job_server(simulatedPP=True, serverList=defaultServerList, secret=defaultSecret):
    #create a true or simulated job server

    #a useful trick for anoymous classes, used when no job servers are available:
    #http://norvig.com/python-iaq.html
    class Struct:
        def __init__(self, **entries): self.__dict__.update(entries)

    if simulatedPP:
        job_server = Struct()
        job_server.submit = lambda func,args=tuple(),depfuncs=tuple(),modules=tuple(),globals=tuple():lambda :func(*args) #creates a lambda fn which takes no arguments and returns the evaluated number  
        job_server.destroy = lambda : ''
        job_server.get_ncpus = lambda : -10
        job_server.get_active_nodes = lambda : {}
        job_server.print_stats = lambda : ''
        job_server.secret = 'epo20pdosl;dksldkmm'
        job_server.ppservers = [('simulated', 60000)]
        job_server.simulated   = True
        job_server.wait = lambda : ''
        print 'Created a SIMULATED job server.'
        print
    else:
        print 'Server list: \n' + str(serverList)
        job_server = pp.Server(ppservers=tuple(serverList), loglevel=pp.logging.DEBUG, restart=True, secret=secret)
        
        job_server.set_ncpus(0)  #making all jobs remote removes unexpected errors
        time.sleep(1)
        print 'Active nodes: \n' + str(job_server.get_active_nodes())
        
    return job_server
def parFeatureExtraction(dataset_location,pathLength):
    t0=time.time()
    path =dataset_location+'/*.wav'
    files=glob.glob(path)
    auList=list()
    for file in files:
        auList.append(file)

    jobs=[]
    with open('/home/ubantu/TwoClassfeatureSet.csv', 'w') as csvfile:
        fieldnames = ['Spect Centroid', 'Spect Rolloff','Spect Flux','RMS','ZCR','SC_SD','SR_SD','SF_SD','ZCR_SD','energy',\
                      'MFCC1','MFCC2','MFCC3','MFCC4','MFCC5','MFCC6','MFCC7','MFCC8','MFCC9','MFCC10','MFCC11','MFCC12','MFCC13',\
                      'CLASS']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        job_server = pp.Server()
        fileIndex=[]
        jobs = [(file, job_server.submit(extractFeature,(file,pathLength),\
                (ClassToInt,spectral_centroid,spectral_rolloff,spectral_flux,\
                 root_mean_square,zero_crossing_rate,mfcc,stEnergy,stft,trfbank,hz2mel,mel2hz,preemp,segment_axis),\
                ("numpy","wavfile","loadmat","lfilter","hamming","fft","dct",))) for file in auList]
        for input,job in jobs:
            #print "INPUT=  ",input,"  JOB=   ",job()
            fileIndex.append(input)
            result=job()
            s1=result[0]
            sr1=result[1]
            sf1=result[2]
            rms=result[3]
            zcr=result[4]
            MFCC_res=result[5]
            #MFCCs=MFCC_res[0]
            #rms= rms[~numpy.isnan(rms)] #rms array contains NAN values and we have to remove these values

            MFCC_coef=list()     #TEMP COMMENT
            ran=MFCC_res.shape
            #print "RAANNNN===",ran
            #for ind in range(len(MFCCs)):
            #    MFCCs[ind][MFCCs[ind] == -inf] = 0
            ran1=ran[0]
            for ind1 in range(13):    #TEMP COMMENT
                sum=0
                for ind in range(ran1):
                    sum+=MFCC_res[ind,ind1]
                MFCC_coef.append(sum/ran1)     #TEMP COMMENT
            eng=result[6]
            intClass=result[7]          #TEMP COMMENT
            #print result,"<===JOB"
            writer.writerow({'Spect Centroid':s1.mean().astype(float), 'Spect Rolloff':sr1.mean().astype(float),'Spect Flux':sf1.mean().astype(float),'RMS':rms.mean().astype(float),'ZCR':zcr.mean().astype(float),\
                            'SC_SD':s1.std().astype(float),'SR_SD':sr1.std().astype(float),'SF_SD':sf1.std().astype(float),'ZCR_SD':zcr.std().astype(float),'energy':eng.astype(float),\
                            'MFCC1':MFCC_coef[0],'MFCC2':MFCC_coef[1],'MFCC3':MFCC_coef[2],'MFCC4':MFCC_coef[3],\
                            'MFCC5':MFCC_coef[4],'MFCC6':MFCC_coef[5],'MFCC7':MFCC_coef[6],'MFCC8':MFCC_coef[7],\
                            'MFCC9':MFCC_coef[8],'MFCC10':MFCC_coef[9],'MFCC11':MFCC_coef[10],'MFCC12':MFCC_coef[11],\
                            'MFCC13':MFCC_coef[12],'CLASS':intClass})


    print "feature extraction done in=   ",time.time()-t0
    #joinCSVs(auList,pathLength)
    print job_server.print_stats()
    print "=======================================END"
Exemplo n.º 11
0
    def fit(self, X, Y):
        """To train with the data provided as ndarrays
	
		:param X: Training features
		:param Y: Labels 
		:returns: Trained model. 
		"""
        # for i in xrange(len(self.treeModels)):
        # 	self.treeModels[i].fit(X,Y, self.splitCriterion, self.weighting)

        server = pp.Server()
        jobqueue = deque()

        for i in xrange(len(self.treeModels)):
            #jobqueue.append(server.submit(self.treeModels[i].fit, (X[reld],Y[reld],self.splitCriterion,self.weighting), modules = ('paralleltrees','copy','numpy','random')))
            jobqueue.append(
                server.submit(self.treeModels[i].fit,
                              (X, Y, self.splitCriterion, self.weighting),
                              modules=('paralleltrees', 'copy', 'numpy',
                                       'random')))

        print "This should be parallel now, check CPU usage in task manager, should be >25%"
        for i in xrange(len(self.treeModels)):
            self.treeModels[i] = copy.deepcopy(jobqueue[0]())
            jobqueue.popleft()

        return
Exemplo n.º 12
0
def ZHfucplbias(sig_arr, detc_arr):
	deriv_step = 0.0001
        step_arr = deriv_step * np.ones(len(detc_arr))
# Liu
	deriv_cof = np.array([detc_arr+2.0*step_arr, detc_arr+step_arr, detc_arr-step_arr, detc_arr-2.0*step_arr, detc_arr])
	ppservers = ()
	ncpus = 8
	ppservers = ("10.0.0.1",)
	job_server = pp.Server(ncpus, ppservers=ppservers)
	start_time = time.time()
	# The following submits 8 jobs and then retrieves the results
	inputs = deriv_cof
	deriv_temp = np.zeros([len(detc_arr), 5])
	jobs = [(input, job_server.submit(ZHfunc,(input, sig_arr, mass_arr, CP['num_r'], CP['rho_m']), (Gaus,), ("numpy","scipy.interpolate",))) for input in inputs]
	i = 0
	for input, job in jobs:
	        #print "Derivative array", "is", job()
	        job()
	        deriv_temp[:, i] = job()
	        i = i + 1
	print "Time elapsed: ", time.time() - start_time, "s"
# Liu
	deriv_nuFu = - np.log(deriv_temp[:, 0]) + 8.0*np.log(deriv_temp[:, 1])
	deriv_nuFu = deriv_nuFu - 8.0*np.log(deriv_temp[:, 2]) + np.log(deriv_temp[:, 3])
        deriv_nuFu = deriv_nuFu / (12.0*deriv_step)
	Bias = 1.0 - deriv_nuFu
	dndlnm = deriv_temp[:, 4]
	for i in range(len(Bias)):
	       if(np.isnan(Bias[i])):
        	        Bias[i] = 0

        return (dndlnm, Bias)
Exemplo n.º 13
0
def calculate_results_parallel(specimens, args, scale_min, scale_max):
    res = [None] * args.o

    if args.x is None:
        ppservers = ()
    else:
        ppservers = (args.x, )

    job_server = pp.Server(args.t, ppservers=ppservers)

    jobs = []
    for i in range(len(res)):
        jobs.append(
            job_server.submit(objective_function,
                              (specimens[i], args.f, scale_min, scale_max),
                              (scale, ), (
                                  'math',
                                  'time',
                              )))

    i = 0
    for job in jobs:
        res[i] = job()
        i += 1

    return res
Exemplo n.º 14
0
def job_server_init():
	ncpus = settings['ncpus']
	pservers = tuple(settings['ppservers'])
       	secret = settings['secret']
	if ncpus is not None:
		job_server = pp.Server(ncpus,
				ppservers=pservers,
				secret=secret,
				socket_timeout=None)
	else:
		job_server = pp.Server(
				ppservers=pservers,
				secret=secret,
				socket_timeout=None)
	print "Starting pp with %d SMP local workers and %s remote workers" % 
		(job_server.get_ncpus(),str(pservers))
Exemplo n.º 15
0
def Score(P, S):
    """

    :rtype : int
    """
    #scoreLock.acquire()
    #print "In score method with P length", len(P)
    finalScore = 0
    # tuple of all parallel python servers to connect with
    #ppServers = ("127.0.0.1",)

    ppservers = ()

    ncpus = 4 #pp.Server.get_ncpus()
    job_server = pp.Server(ncpus, ppservers=ppservers)

    jobs = [(input, job_server.submit(Max_Match, (P, input,))) for input in S]
    for input, job in jobs:
        finalScore += job()
    returnScore = finalScore
    job_server.destroy()
    #print "returning score ", returnScore
    #scoreLock.release()

    return returnScore
Exemplo n.º 16
0
    def import_recommendscore(self, filename):
        user_rankingscore = {}
        user_auc = {}
        user_predictitem = {}
        user_list = []
        job_server = pp.Server()  # require parallel python
        try:
            with open(self._filepath + filename, "r") as f:
                templine = f.readline()
                while (templine):
                    temp = templine[:-1].split("    ")[:3]
                    user = int(temp[0])
                    item = int(temp[1])
                    score = float(temp[2])
                    if user not in user_list:
                        job = job_server.submit(func=self.calc, \
                            args=((user, item, score), f.tell(), filename), \
                                depfuncs=(), modules=("random",), \
                                    callback=self.para_statistics)
                        user_list.append(user)

                    templine = f.readline()
                job_server.wait()
        except Exception, e:
            print e
            sys.exit()
Exemplo n.º 17
0
def test_pp_function():
    """Test parallel python with plain function
    """
    logger = getLogger("ostap.test_pp_function")
    logger.info('Test job submission with %s' % pp)

    from ostap.core.known_issues import DILL_ROOT_issue
    if DILL_ROOT_issue:
        logger.warning("test is disabled for Python %s (dill/ROOT issue)")
        return

    job_server = pp.Server()

    jobs = [(i, job_server.submit(make_histo, (i, n)))
            for (i, n) in enumerate(inputs)]

    result = None
    for input, job in progress_bar(uimap(jobs), max_value=len(jobs)):
        histo = job()
        if not result: result = histo
        else:
            result.Add(histo)
            del histo

    logger.info("Histogram is %s" % result.dump(80, 10))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    job_server.print_stats()

    with wait(1), use_canvas('test_pp_function'):
        result.draw()

    return result
Exemplo n.º 18
0
def test_pp_callable():
    """Test parallel python with callable  
    """
    logger = getLogger("ostap.test_pp_callable")
    logger.info('Test job submission with %s' % pp)

    logger.warning("test is disabled for UNKNOWN REASON")
    return

    job_server = pp.Server()

    jobs = [(i, job_server.submit(mh.__call__, (i, n)))
            for (i, n) in enumerate(inputs)]

    result = None
    for input, job in progress_bar(uimap(jobs), max_value=len(jobs)):
        histo = job()
        if not result: result = histo
        else:
            result.Add(histo)
            del histo

    logger.info("Histogram is %s" % result.dump(80, 10))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    with wait(1), use_canvas('test_pp_callable'):
        result.draw()

    return result
Exemplo n.º 19
0
def multicore():
    delta = 20
    #set up the evo strategy
    best_list, mut_list = [], []
    evo = popstrat.Evostrategy(5000, 50)
    children = evo.iterate(evo.pop)

    nodes = ("*",)
    job_server = pp.Server(8, ppservers=nodes)
    print "Starting pp with", job_server.get_ncpus(), "workers"

    start_time = time.time()

    for i in range(50):
        run_time = time.time()
        jobs = [(child, job_server.submit(run_grn, 
                                          (child['genome'], 
                                           delta),
                                           (),
                                           ("grn","numpy","math")))
                                           for child in children]
        for child, result in jobs:
            results, conclist = result()
            bestidx = results.index(max(results))
            child['fitness'] = results[bestidx]

        #plotting the best with colors
        children = evo.iterate(children)
        bestgenome = evo.pop[-1]['genome']
        bestresult, conclist = run_grn(bestgenome, delta)
        bestidx = bestresult.index(max(bestresult))
        filename = "best_gen_"+str("%03d" % i)
        print filename

        colors = []
        simplist = []
        for idx, result in enumerate(bestresult):
            if idx == len(bestresult)-1:
                simplist.append(conclist[idx])
                colors.append('k')
            elif idx == bestidx:
                colors.append('g')
                simplist.append(conclist[idx])
            # elif result == 0:
            #     colors.append('b')
            # else:
            #     colors.append('r')
        graph.plot_2d(simplist, filename, colors)

        print "gen:", evo.gen_count, "fitness:", evo.pop[-1]['fitness']

        if evo.adaptive:
            evo.adapt_mutation()

        best_list.append(evo.pop[-1]['fitness'])
        mut_list.append(evo.mut_rate)

    mutfile = open('mutrate.txt','a')
    mutfile.write(str(mut_list)+'\n')
    mutfile.close()
Exemplo n.º 20
0
 def __init__(self, ppservers, ncpus, txtHasWeight, m_LexFile):
     self.text_has_weights = txtHasWeight
     self.job_server = pp.Server(ncpus, ppservers, jobInsertType=2)
     self.jobs = []
     self.m_nPrsCps = 0
     self.m_LexFile = m_LexFile
     self.m_CorpusList = []
def pp_pull(devices):
    ppservers = ()
    job_server = pp.Server(
        ppservers=ppservers)  #  autodetect number of cpu's "workers"
    #job_server = pp.Server(ncpus=1, ppservers=ppservers)  #  manually set number of cpu's "workers", setting of 1 makes it be sequential
    start_time = time.time()
    for dev in devices:
        print("*", end=' ')
    print(" (with", job_server.get_ncpus(), "workers)")
    print("Returning Parallel-Python tasks:", end=' ')
    jobsList = []
    jobs = [(dev,
             job_server.submit(issue_device_cmd,
                               args=(
                                   utility,
                                   command_line,
                                   dev,
                               ),
                               depfuncs=(),
                               modules=(
                                   "sys",
                                   "shlex",
                                   "subprocess",
                                   "time",
                                   "itertools",
                               ))) for dev in devices]
    for dev, job in jobs:
        r = (dev, job())
        jobsList.append(r)
        #print ("dev", dev, "got", r)
    #for job in jobsList: print job
    for job in jobsList:
        print(".", end=' ')
    print("")
    for i in range(0, len(jobsList)):
        results = str(jobsList[i][1]).split(",")
        results1 = shlex.split(str(jobsList[i][1]))
        '''
        #print ("results:", results)
        #print ("results1:", results1)
        testTimeS = "".join(re.findall('\d+\.\d+', results1[2]))
        print (testTimeS)
        testTimeF = float(testTimeS)
        print (testTimeF)
        testTimeV = round(testTimeF, 3)
        print (testTimeV)
        '''
        testTimeV = round(float("".join(re.findall('\d+\.\d+', results1[2]))),
                          3)
        print(" ", jobsList[i][0], "[Return code:", results1[0],
              results[1].strip() + ")", "]", testTimeV, " seconds")
        if "Unknown option" in results[1]:
            print("Aborting test!")
            print("")
            sys.exit(1)
    print("Parallel-Python tasks took: ", round(time.time() - start_time, 3),
          "seconds")
    print("")
    print("Parallel-Python", )
    job_server.print_stats()
Exemplo n.º 22
0
def multiTheadSimulate():
    """
    使用多核多线程计算三基金组合策略,节省时间
    :return:
    """
    ppservers = ()
    job_server = pp.Server(ppservers=ppservers)

    profitMean, logMean = getAverageMean()
    funds = getupMeanFunds(profitMean, logMean)
    t = []
    jobs = {}
    for i in range(0, len(funds)):
        jobs[funds[i]] = job_server.submit(simulate, (
            funds[i],
            funds,
            i,
        ), (multipleStrategy, ), ("time", "MongoDBUtil"))
    job_server.wait()

    for key in jobs.keys():
        r = jobs[key]()
        for item in r:
            t.append(item)
    with open("../docs/multi/3.json", "w", encoding='utf-8') as f:
        json.dump(t, f, ensure_ascii=False, indent=4)
Exemplo n.º 23
0
 def align(self, ):
     quality = self.quality
     gen_index = self.genome_index
     bowtie2 = self.bowtie2
     out_dir = self.output_folder
     experience_name = self.name
     i = 0
     ncpu_pp = 1
     speed = self.speed_looping
     ncpu_bowtie = self.ncpu / ncpu_pp
     looping = self.looping
     len_tag = self.len_tag
     paired_wise_fastq = self.paired_wise_fastq
     jobs = []
     ppservers = ()
     job_server = pp.Server(ncpu_pp, ppservers=ppservers)
     print self.bowtie2
     print self.paired_files
     for paired_file in self.paired_files:
         print paired_file[0] + '  ' + paired_file[1]
         id = str(i)
         in_a = paired_file[0]
         in_b = paired_file[1]
         fastq_alignment.bowtie_fastq(bowtie2, in_a, in_b, gen_index,
                                      out_dir, id, ncpu_bowtie, looping,
                                      quality, len_tag, paired_wise_fastq,
                                      speed, self.len_paired_wise_fastq)
         #            jobs.append(job_server.submit(fastq_alignment.bowtie_fastq, (bowtie2, in_a, in_b, gen_index, out_dir,id,ncpu_bowtie, looping,quality,len_tag,paired_wise_fastq,speed)
         #                , (fastq_alignment.sam_filter,), ("fastq_alignment",)))
         i = i + 1
         print str(i) + " jobs launched over " + str(ncpu_bowtie) + " cores"
Exemplo n.º 24
0
def get_subscriptions(request):
    if 'token' in request.session:
        api = Api()
        api.SetAuthSubToken(request.session['token'])
        job_server = pp.Server(15, ppservers=())
        jobs = []
        for entry in api.GetYouTubeSubscriptionFeed().entry:
            uri = 'http://gdata.youtube.com/feeds/api/users/%s/uploads' % entry.username.text
            jobs.append(
                job_server.submit(
                    gdata.youtube.service.YouTubeService().GetYouTubeUserFeed,
                    (
                        uri,
                        None,
                    ),
                    modules=(
                        "gdata.youtube",
                        "gdata.youtube.service",
                    )))
        feed = []
        for job in jobs:
            for entry in job().entry:
                feed.append(getVideoDict(entry))
        job_server.destroy()
        #for entry in api.GetYouTubeSubscriptionFeed().entry:
        #	feed.extend(api.GetYouTubeUserFeed(username=entry.username.text).entry)
        feed.sort(key=lambda x: x['date'], reverse=True)
        return render_to_response("subscriptions.html", {'scrfeed': feed[:20]},
                                  context_instance=RequestContext(request))
    else:
        messages.warning(request, "Login to view your subscriptions!")
        return redirect("/")
Exemplo n.º 25
0
def calc_pce_fft(rparams, subkeys):
	"""
	Submits jobs to job server
	"""
	
	# get a list of all dictionaries we want to run...
	rdicts = jfdfdUtil.dictpairs(rparams)
	
	if parallel:
		# tell parallel python how many cpus to use
		job_server = pp.Server(ncpus=rparams['ncpus'])
		jobs = []
		
		# go through the dictionaries and run them or submit them to the job server..
		for rd in rdicts:
			#run_one(rd, subkeys)
			job = job_server.submit(calc_pce_fft_one, (rd, subkeys), (), ('jfdfd', 'jfdfdUtil', 'pylab', 'numpy', 'materials', 'Plasmons','Patrick_Utilities','plasmon_fft'))
			jobs.append(job)
		njobs = len(jobs)
		print 'Submitted %d jobs' % njobs
		
		for ijob, job in enumerate(jobs):
			job()
			print ' %g%% done (%d of %d jobs remaining)' % ((ijob+1)*100./njobs, njobs-ijob-1, njobs)
	else:
		for rd in rdicts:
			calc_pce_fft_one(rd, subkeys)
Exemplo n.º 26
0
    def predict(self, data, dict_of_additional_variables={}):

        ncpus = max(len(self.models), 32)
        job_server = pp.Server(ncpus, ppservers=())
        jobs = dict()
        to_import = ("import numpy as np", "sklearn", "time",
                     "from localRegression import *",
                     "from sklearn.linear_model import sparse",
                     "from sklearn.utils import atleast2d_or_csc")
        for name, model in sorted(self.models.iteritems()):
            try:
                predictargs = [data] + dict_of_additional_variables[name]
            except KeyError:
                predictargs = [data]

            jobs[name] = job_server.submit(
                pp_predict, (model, name, self.verbose, predictargs), (),
                to_import)

        X = np.zeros((data.shape[0], len(self.models)))
        i = 0
        for name, model in sorted(self.models.iteritems()):
            X[:, i] = jobs[name]()
            i += 1
        job_server.destroy()
        return self.blender.predict(X)
Exemplo n.º 27
0
def _pp_needs_monkeypatching():
    # only run this function the first time mdp is imported
    # otherwise reload(mdp) breaks

    if not hasattr(mdp, '_pp_needs_monkeypatching'):
        # check if we are on one of those broken system were
        # parallel python is affected by
        # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=620551
        # this is a minimal example to reproduce the problem
        # XXX IMPORTANT XXX
        # This function only works once, i.e. at import
        # if you attempt to call it again afterwards,
        # it does not work [pp does not print the error twice]

        # we need to hijack stdout here, because pp does not raise
        # exceptions: it writes to stdout directly!!!

        # pp stuff
        import pp
        server = pp.Server()
        with _sys_stdout_replaced() as capture:
            server.submit(lambda: None, (), (), ('numpy', ))()
            server.destroy()

        # read error from hijacked stdout
        error = capture.getvalue()
        mdp._pp_needs_monkeypatching = 'ImportError' in error

    return mdp._pp_needs_monkeypatching
Exemplo n.º 28
0
def main() : 
    import sys
    
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, argument_default=argparse.SUPPRESS)
    parser.add_argument('--target', type=str)
    parser.add_argument('--job', type=int)
    parser.add_argument('-g', type=int, help="Number of generation")
    parser.add_argument('-n', type=int, help="Population Size")
    
    args = parser.parse_args()
    target = args.target
    landscape = Landscape(target)
    print "====================================================================================================="
    print "Solving for the target = ", target 
    print "====================================================================================================="
    number_of_run = args.job
    init_depth =len(target)
    mut_prob = 1./init_depth
    number_of_generation = args.g
    pop_size = args.n
    p_n = [0.7,0.1,0.1,.1] #default = [0.25,0.25,0.25,.25] [0.25,0.65,0.05,.05] [0.7,0.1,0.1,.1] ["A", "G","U","C"]
    p_c = [0.5, 0.5, 0., 0.,0.,0.] #[0.2,0.2,0.1,0.1,0.2,0.2] #[0.4, 0.5, 0.1, 0.,0.,0.] ["GC","CG","AU","UA", "GU", "UG"]
    ppservers = ()
    mut_probs = numpy.array(RNA.ptable(target)[1:])
    mut_probs = mut_probs + mut_prob
    mut_probs[mut_probs>mut_prob] = 0

    
    job_server = pp.Server(4, ppservers=ppservers)
    
    print "Start running job", number_of_run
    
    run(number_of_generation, pop_size, mut_probs, 0, landscape, p_n, p_c)
    
    """
Exemplo n.º 29
0
def checkOptString():
    ppservers = ("192.168.0.125:35000", )
    job_server = pp.Server(4, ppservers=ppservers)
    print "Starting parallel python with", job_server.get_ncpus(), "workers"
    print "*********************\n"
    ProgConstant = ShareConstant()
    ProgConstant.POP_SIZE = 3000
    ProgConstant.CHROMO_LENGTH = 30000
    ConstPrint(ProgConstant)

    chromo_list = []
    lenCount = []
    for i in range(100):
        lenCount.append(0)

    getRandomBit_jobs = [(job_server.submit(GetRandomBits,
                                            (ProgConstant.CHROMO_LENGTH, ), (
                                                ChromoTyp,
                                                RANDOM_NUM,
                                            ), ("random", )))
                         for i in range(ProgConstant.POP_SIZE)]
    count = 0
    for job in getRandomBit_jobs:
        count += 1
        chromo_list.append(job())

    print "Count: {}".format(count)

    for i in range(ProgConstant.POP_SIZE):
        index = numberOfDigits(chromo_list[i].bits, ProgConstant)
        lenCount[index] += 1

    for i in range(len(lenCount)):
        if lenCount[i] > 0:
            print "index: {} count: {} ".format(i, lenCount[i])
def main():
  features = np.loadtxt(sys.argv[1])
  job_server = pp.Server(8)
  dist_start = 0.1
  dist_step = 0.02
  sample_start = 5
  sample_step = 1
  jobs = []
  for i in range(0, 30):
    for j in range(0, 70):
      eps = dist_start + i * dist_step
      min_samples = sample_start + j * sample_step
      jobs.append(
          ( eps, min_samples, job_server.submit(mysilhouette, (features, eps, min_samples), (), ("sklearn.metrics", "sklearn.cluster","numpy",))))
      print "Task for: " + str(eps) + ", "  + str(min_samples) + " submitted"
  
  X = []; Y = []; Z1 = []; Z2 = []
  for eps, min_samples, job in jobs:
    X.append(eps)
    Y.append(min_samples)
    temp = job()
    Z1.append(temp[0]) # Silhouette Score
    Z2.append(temp[1]) # Number of clusters
    print "Row: " + str(eps) + " column: " + str(min_samples) + " s: " + str(temp[0]) + " n: " + str(temp[1])

  m = np.max(Z1)
  i = np.argmax(Z1)
  print "--- Largest Silhouette Score: " + str(m) + " ---"
  print "--- eps: " + str(X[i]) + " min_samples: " + str(Y[i]) + " number_clusters: " + str(Z2[i]) + " ---"

  np.savetxt("silhouette_x", X)
  np.savetxt("silhouette_y", Y)
  np.savetxt("silhouette_z1", Z1)
  np.savetxt("silhouette_z2", Z2)