Exemplo n.º 1
0
def calculate_prob(hole_cards, num_iterations, given_board):
    import itertools
    
    #must pip these library
    from multiprocess import Pool
    import dill as pickle

    #creates 4 threads
    p = Pool(4)

    deck_cards = prob_functions.generate_deck(hole_cards)
    possible_card_pairings = tuple(itertools.combinations(deck_cards, 2))
    card_combos = map( lambda x: tuple (list(hole_cards) + [x]), possible_card_pairings)

    s = pickle.dumps(lambda hc: single_prob(hc, num_iterations, given_board))
    f = pickle.loads(s)

    prob_list = p.map( f , card_combos)

    tie = 0
    win = 0
    for prob in prob_list:
        tie += prob[0] 
        win += prob[1]
    l = len(prob_list)
    tie = tie / l
    win = win / l

    return (tie,win)
Exemplo n.º 2
0
def pcall_mp(fun,args,cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores) # create pool
#    print("Using",cores,"cores")
    out = mainpool.map(fun,args) # return list
    mainpool.terminate()
    del mainpool # delete pool
    return out
Exemplo n.º 3
0
 def inner(*args):
     pool = Pool(processes=1)
     res = pool.apply_async(f,args)
     try:
         v = res.get(timeout=sec)
     except Exception as inst:
         print(inst)
         v = None
     finally:
         pool.terminate()
         return v
Exemplo n.º 4
0
    def __init__(self):
        super(GroupCheckerGui, self).__init__('Group Checker')

        self._group_name = ControlText('Group Name', CONFIG['group_name'])
        self._group_name.enabled = False
        self._allowed_tags = UnicodeControlList('Allowed Tags',
                                               plusFunction=self.__add_tag_action,
                                               minusFunction=self.__remove_tag_action)
        self.allowed_tags = GuiList(CONFIG['white_filters']['SubstringFilter']['substrings'],
                                    self._allowed_tags)

        self._allowed_ids = ControlList('Allowed Ids',
                                        plusFunction=self.__add_id_action,
                                        minusFunction=self.__remove_id_action)
        self.allowed_ids = GuiList(CONFIG['white_filters']['SignerFilter']['ids'], self._allowed_ids)

        self._bad_posts = ControlCheckBoxList('Bad posts')
        self._bad_posts._form.listWidget.itemDoubleClicked.connect(self.__show_link_action)

        self._remove_button = ControlButton('Remove')
        self._remove_button.value = self.__remove_action

        self._show_button = ControlButton('Show bad posts')
        self._show_button.value = self.__show_bad_post_action

        self.pool = Pool(processes=1)
        self.bad_posts = []

        self._formset = [('', '_group_name', ''),
                         ('', '_allowed_tags', '_allowed_ids', ''),
                         '',
                         ('', '_bad_posts', ''),
                         ('', '_remove_button', '_show_button', ''),
                         '']
Exemplo n.º 5
0
def download_image_thread(location_q, image_q, MAX_DL_THREADS=10):
    print("Running Download Image Thread.")

    max_processes = MAX_DL_THREADS
    print("Creating a thread pool of size {} for downloading images...".format(max_processes))
    pool = Pool(processes=max_processes)
    # Allow us to have n processes runnning, and n processes scheduled to run
    # TODO: Manager is not necessary here, but is used to get around the fact
    # that thread-safe objects cannot be passed by reference, they must be
    # inheretence. A more lightweight solution should be found
    workers = Manager().Semaphore(max_processes*2)

    def async_download(location):
        image = download_image(location)
        image_q.put((location, image), True)
        workers.release()

    while True:
        location = location_q.get(True)
        workers.acquire()
        pool.apply_async(async_download, (location,))
Exemplo n.º 6
0
class ProcessPoolExecutor(Executor):
    """Process Pool Executor"""
    def __init__(self):
        super(ProcessPoolExecutor, self).__init__()
        import os
        from multiprocess import Pool
        self.pool = Pool(os.cpu_count() or 1)

    def submit(self, func, *args, **kwargs):
        from concurrent.futures import Future
        fut = Future()
        self.tasks[fut] = self.pool.apply_async(
            func, args, kwargs, fut.set_result, fut.set_exception
        )
        fut.add_done_callback(self.tasks.pop)
        return fut

    def shutdown(self, wait=True):
        super(ProcessPoolExecutor, self).shutdown(wait)
        self.pool.terminate()
        self.pool.join()
Exemplo n.º 7
0
    def __init__(self, storage, threads):
        # Manager for concurrency
        self.manager = Manager()

        # System storage
        self.storage = storage

        # Queues
        self.high_access = self.manager.list([])
        self.normal_access = self.manager.list([])
        self._pool = Pool(processes=threads)

        # Operations
        self.operation_table = self.manager.dict()
Exemplo n.º 8
0
 def get_new_tickets(self, from_time=utils.pre_day_to_string(1)):
     search_conditions = {
         "skip": 0,
         "query": {
             "ctimeGte": "{}T21:00:00.000Z".format(from_time)
          }
     }
     pool_size = multiprocess.cpu_count()
     pool_volume = 10 * pool_size
     index = 0
     tickets_num = self._get_number_of_tickets(from_time, to_time)
     req_num = utils.ceil_division(tickets_num, 1000)
     pool = Pool(pool_size)
     for req_count in range(req_num):
         search_tickets = self.search_tickets(search_conditions)
         while True:
             tickets = pool.map(self.add_attr_to_ticket, itertools.islice(search_tickets, pool_volume))
             if tickets:
                 print('Downloaded {}/{} tickets'.format(index, tickets_num), end='\r')
                 index += pool_volume
                 yield tickets
             else:
                 break
         search_conditions['skip'] += 1000
Exemplo n.º 9
0
Arquivo: olt.py Projeto: sjava/olt
def zte_gpon_svlan_check():
    clear_log()
    nodes = graph.cypher.execute(
        "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)")
    olts = ((x[0], x[1]) for x in nodes)
    lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1])
    pool = Pool(8)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, lzte_gpon_svlan), olts))
    pool.close()
    pool.join()
Exemplo n.º 10
0
    def prime_calculate(self):
        break_points = []  # List that will have start and stopping points
        for i in range(cores):  # Creates start and stopping points based on length of range_finish
            break_points.append(
                {"start": int(math.ceil(((self.maximum_prime + 1) + 0.0) / cores * i)),
                 "stop": int(math.ceil(((self.maximum_prime + 1) + 0.0) / cores * (i + 1)))})

        p = Pool(cores)  # Number of processes to create.
        for i in break_points:  # Cycles though the breakpoints list created above.
            a = p.apply_async(self.prime_calculator, kwds=i, args=tuple(),
                              callback=self.update_num)  # This will start the separate processes.
        p.close()  # Prevents any more processes being started
        p.join()  # Waits for worker process to end
Exemplo n.º 11
0
Arquivo: switch.py Projeto: sjava/olt
def interface_check_m():
    clear_log()
    #  cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model"
    cmd = "match(s: Switch)  return s.ip, s.model"
    #  cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2"
    nodes = graph.cypher.execute(cmd)
    switchs = [(x[0], x[1]) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    out_inf = partial(output_interface_m, lock)
    list(pool.map(compose(out_inf, get_interface), switchs))
    pool.close()
    pool.join()
Exemplo n.º 12
0
Arquivo: olt.py Projeto: sjava/olt
def svlan_check():
    clear_log()
    #  nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246')
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='company', property_value='zte')
    olts = [(x['ip'], x['company'], x['area']) for x in nodes]
    #  list(map(compose(card_entry, get_card), olts))
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, get_svlan), olts))
    pool.close()
    pool.join()
Exemplo n.º 13
0
Arquivo: olt.py Projeto: sjava/weihu
def add_infs():
    funcs = {'zte': Zte.get_infs, 'hw': Huawei.get_infs}
    get_infs = partial(_company, funcs)

    clear_log()
    nodes = graph.cypher.execute(
        'match (n:Olt) return n.ip as ip,n.company as company')
    olts = [dict(ip=x['ip'], company=x['company']) for x in nodes]
    pool = Pool(128)
    lock = Manager().Lock()
    _add_infs_p = partial(_add_infs, lock)
    list(pool.map(compose(_add_infs_p, get_infs), olts))
    pool.close()
    pool.join()
Exemplo n.º 14
0
def main(args):

    filedate = args.filedate
    database = args.database

    slablist = ['alu','cal','cam','car','cas','cot','hal','hel','him','hin','izu','jap','ker','kur','mak','man','mue','pam','png','phi','puy','ryu','sam','sco','sol','sul','sum','van']

    indices = range(len(slablist))
    pool1 = Pool(args.nCores)
    partial_loop1 = partial(calls2d, database, filedate, slablist)

    pts = pool1.map(partial_loop1, indices)
    pool1.close()
    pool1.join()
Exemplo n.º 15
0
Arquivo: olt.py Projeto: sjava/olt
def hostname_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(hostname_entry, lock)
    list(pool.map(compose(func, get_hostname), olts))
    pool.close()
    pool.join()
    ip_hostname = (x.split(',') for x in open(result_file))
    cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}"
    list(map(lambda x: graph.cypher.execute(
        cmd, ip=x[0], hostname=x[1]), ip_hostname))
Exemplo n.º 16
0
def get_vlan_usersP(bras):
    def _get_vlan_users(bas):
        funcs = {'m6k': M6k.get_vlan_users,
                 'me60': ME60.get_vlan_users}
        _gvu = partial(_model, funcs)
        return _gvu(bas)

    bras = [dict(ip=x[0], model=x[1], inf=x[2])
            for x in bras]
    pool = Pool(len(bras))
    temp = pool.map(_get_vlan_users, bras)
    pool.close()
    pool.join()
    temp = [x[1] for x in temp if x[1]]
    rslt = reduce(lambda x, y: merge_with(sum, x, y), temp)
    return rslt
Exemplo n.º 17
0
 def calculate(self, data):
     t1 = dt.datetime.utcnow()
     LOGGER.info('Starting calculation...')
     self._data = deepcopy(data)
     self._check_inputs(data)
     dep = self._dependencies()
     sorted_dep = topological_sort(dep)
     for items in sorted_dep:
         # loading node with inputs
         for item in items:
             node = self._get_node(item)
             args = [i_name for i_name in node.input_names if i_name not in node.kwargs]
             data_to_pass = []
             for arg in args:
                 data_to_pass.append(self._data[arg])
             kwargs_to_pass = {}
             for kwarg in node.kwargs:
                 kwargs_to_pass[kwarg] = self._data[kwarg]
             node.load_inputs(data_to_pass, kwargs_to_pass)
         # running nodes
         if self._parallel:
             pool = Pool(self._pool_size)
             results = pool.map(
                 Graph.run_node,
                 [self._get_node(i) for i in items]
             )
             pool.close()
             pool.join()
             results = {k: v for k, v in results}
         else:
             results = {}
             for item in items:
                 node = self._get_node(item)
                 res = node.run_with_loaded_inputs()
                 results[node.id] = res
         # save results
         for item in items:
             node = self._get_node(item)
             res = results[node.id]
             if len(node.output_names) == 1:
                 self._data[node.output_names[0]] = res
             else:
                 for i, out in enumerate(node.output_names):
                     self._data[out] = res[i]
     t2 = dt.datetime.utcnow()
     LOGGER.info('Calculation finished in {}'.format(t2-t1))
     return res
Exemplo n.º 18
0
Arquivo: olt.py Projeto: sjava/olt
def zhongji_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(zhongji_entry, lock)
    list(pool.map(compose(func, get_zhongji), olts))
    pool.close()
    pool.join()
    ports = (x.split(',') for x in open(result_file))
    cmd = """match(n: Olt) where n.ip = {ip} 
    merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) 
    merge(m) - [:Include]->(p: Port{name: {interface}})"""
    list(map(lambda x: graph.cypher.execute(
        cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
Exemplo n.º 19
0
def parallel_cdist(data1, data2, n_rows_per_job=100):

    from scipy.spatial.distance import cdist

    data1 = np.array(data1)
    data2 = np.array(data2)

    pool = Pool(12)

    start_indices = np.arange(0, data1.shape[0], n_rows_per_job)
    end_indices = start_indices + n_rows_per_job - 1

    partial_distance_matrices = pool.map(lambda (si, ei): cdist(data1[si:ei+1].copy(), data2), zip(start_indices, end_indices))
    pool.close()
    pool.join()

    distance_matrix = np.concatenate(partial_distance_matrices)
    return distance_matrix
Exemplo n.º 20
0
    def eval_EFG(self,x,num_procs=None,info=False):

        from multiprocess import Pool,cpu_count

        if not num_procs:
            num_procs = cpu_count()
        num_samples = self.parameters['num_samples']
        pool = Pool(num_procs)
        num = int(np.ceil(float(num_samples)/float(num_procs)))
        results = list(zip(*pool.map(lambda i: self.eval_EFG_sequential(x,num,i,info),range(num_procs),chunksize=1)))
        pool.terminate()
        pool.join()
        if not info:
            assert(len(results) == 4)
        else:
            assert(len(results) == 5)
        assert(all([len(vals) == num_procs for vals in results]))
        return [sum(vals)/float(num_procs) for vals in results]
Exemplo n.º 21
0
def add_power_info():
    funcs = {'S8508': S85.get_power_info,
             'S8505': S85.get_power_info,
             'T64G': T64.get_power_info,
             'S8905': S89.get_power_info,
             'S8905E': S8905E.get_power_info,
             'S9306': S93.get_power_info,
             'S9303': S93.get_power_info}
    get_power_info = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model")
    switches = [dict(ip=x['ip'], model=x['model']) for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_power_info, lock)
    list(pool.map(compose(_ff, get_power_info), switches))
    pool.close()
    pool.join()
Exemplo n.º 22
0
def add_traffics():
    funcs = {'S8508': S85.get_traffics,
             'S8505': S85.get_traffics,
             'T64G': T64.get_traffics,
             'S8905': S89.get_traffics,
             'S8905E': S8905E.get_traffics,
             'S9306': S93.get_traffics,
             'S9303': S93.get_traffics}
    get_traffics = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model")
    switchs = [dict(ip=x['ip'], infs=x['infs'], model=x['model'])
               for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_traffics, lock)
    list(pool.map(compose(_ff, get_traffics), switchs))
    pool.close()
    pool.join()
Exemplo n.º 23
0
def compute_jaccard_pairwise(indices, square_form=True, parallel=True, return_poses=False):
    n = len(indices)

    if parallel:
        pool = Pool(16)
        scores_poses_tuples = pool.map(lambda x: compute_jaccard_i_vs_list(x[0],x[1]),
                                   [(indices[i], indices[i+1:]) for i in range(n)])
        pool.close()
        pool.join()
    else:
        scores_poses_tuples = [compute_jaccard_i_vs_list(indices[i], indices[i+1:]) for i in range(n)]

    pairwise_scores = np.array([scores for scores, poses in scores_poses_tuples])

    if square_form:
        pairwise_scores = squareform(np.concatenate(pairwise_scores))

    if return_poses:
        poses = np.array([poses for scores, poses in scores_poses_tuples])
        return pairwise_scores, poses
    else:
        return pairwise_scores
Exemplo n.º 24
0
    def eval_EQ(self,p,num_procs=None,quiet=True):
        """
        Evaluates E[Q(p,r)] and its gradient in parallel. 

        Parameters
        ----------
        p : generator powers
        num_procs : number of parallel processes
        quiet : flag
        """
       
        from multiprocess import Pool,cpu_count
 
        if not num_procs:
            num_procs = cpu_count()
        num_samples = self.parameters['num_samples']
        pool = Pool(num_procs)
        num = int(np.ceil(float(num_samples)/float(num_procs)))
        results = list(zip(*pool.map(lambda i: self.eval_EQ_sequential(p,num,i,quiet),range(num_procs),chunksize=1)))
        pool.terminate()
        pool.join()
        assert(len(results) == 2)
        assert(all([len(vals) == num_procs for vals in results]))
        return [sum(vals)/float(num_procs) for vals in results]
    labelmap_fp = os.path.splitext(
        input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg)

    bp.pack_ndarray_file(big_labelmap, labelmap_fp)

    #     for tile_i in range(12):
    #         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d.tif' % \
    #                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))
    #         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d_labelmap_cellprofiler.bp' % \
    #                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))

    # Generate labelmap viz
    t = time.time()

    viz = img_as_ubyte(label2rgb(big_labelmap, bg_label=0, bg_color=(0, 0, 0)))
    cv2.imwrite(
        os.path.splitext(input_img_fp)[0] +
        '_labelmap_%(alg)s.png' % dict(alg=alg), viz)

    sys.stderr.write('Generate labelmap viz: %.2f seconds.\n' %
                     (time.time() - t))  # 60s

t = time.time()

pool = Pool(12)
pool.map(detect_cells, range(first_sec, last_sec + 1))
pool.close()
pool.join()

sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time() - t))
Exemplo n.º 26
0
            w = w_tb
            h = h_tb
        else:
            raise

#             input_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=5, version=version, resol='raw')
        out_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=2, resol=resol, version=version)
        print 'out_dir:', out_dir
#             script = os.path.join(REPO_DIR, 'preprocess', 'warp_crop_IM_v3.py')

#         ! rm -rf {out_dir}
        create_if_not_exists(out_dir)

        t = time.time()

        pool = Pool(8)
        _ = pool.map(lambda img_name: crop(stack=stack, img_name=img_name, version=version, resol=resol, 
                                        x=x, y=y, w=w, h=h), 
                     metadata_cache['valid_filenames'][stack])
        pool.close()
        pool.join()

#             for img_name in metadata_cache['valid_filenames'][stack]:
#                 f(stack=stack, img_name=img_name, version=version, resol=resol, 
#                                             x=x, y=y, w=w, h=h)

    #     run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d  \"%%(output_fp)s\"' % \
    #                     {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw},
    #                     kwargs_list=[{'input_fp': DataManager.get_image_filepath_v2(stack=stack, prep_id=5, resol='raw', version=version, fn=img_name),
    #                                   'output_fp': DataManager.get_image_filepath_v2(stack=stack, fn=img_name, prep_id=2, version=version, resol='raw')}
    #                                  for img_name in metadata_cache['valid_filenames'][stack]],
Exemplo n.º 27
0
    def sample_chains(self,
                      n_sample,
                      init_states,
                      chain_var_funcs,
                      n_process=1,
                      memmap_enabled=False,
                      memmap_path=None,
                      stack_chain_arrays=False):
        """Sample one or more Markov chains from given initial states.

        Performs a specified number of chain iterations (each of which may be
        composed of multiple individual Markov transitions), recording the
        outputs of functions of the sampled chain state after each iteration.
        The chains may be run in parallel across multiple independent processes
        or sequentially. In all cases all chains use independent random draws.

        Args:
            n_sample (int): Number of samples (iterations) to draw per chain.
            init_states (Iterable[ChainState] or Iterable[array]):
                Initial chain states. Each entry can be either an array
                specifying the state or a `ChainState` instance. One chain will
                be run for each state in the iterable sequence.
            chain_var_funcs (dict[str, callable]): Dictionary of functions
                which compute the chain variables to be recorded at each
                iteration, with each function being passed the current state
                and returning an array corresponding to the variable(s) to be
                stored. The keys to the functions are used to index the chain
                variable arrays in the returned data.
            n_process (int or None): Number of parallel processes to run chains
                over. If set to one then chains will be run sequentially in
                otherwise a `multiprocessing.Pool` object will be used to
                dynamically assign the chains across multiple processes. If
                set to `None` then the number of processes will default to the
                output of `os.cpu_count()`.
            memmap_enabled (bool): Whether to memory-map arrays used to store
               chain data to files on disk to avoid excessive system memory
               usage for long chains and/or high memory chain states. The chain
               data is written to `.npy` files in the directory specified by
               `memmap_path` (or a temporary directory if not provided). These
               files persist after the termination of the function so should be
               manually deleted when no longer required.
            memmap_path (str): Path to directory to write memory-mapped chain
               data to. If not provided, a temporary directory will be created
               and the chain data written to files there.
            stack_chain_arrays (bool): Whether to stack the lists of per-chain
               arrays in the returned dictionaries into new arrays with the
               chain index as the first axis. Note if set to `True` when
               memory-mapping is enabled (`memmap_enabled=True`) all
               memory-mapped arrays will be loaded from disk in to memory.

        Returns:
            chains (dict[str, list[array]]):
                Chain variable array lists, with one entry per function in
                `chain_var_funcs` with the same key. Each entry consists of a
                list of arrays, one per chain, with the leading dimension of
                the arrays corresponding to the sampling (draw) index.
            chain_stats (dict[str, dict[str, list[array]]]):
                Dictionary of chain transition statistics. Outer dictionary
                contains entries for each chain transition which returns
                statistics (e.g. acceptance probabilities) on each iteration.
                For each such transition, a dictionary is returned with string
                keys describing the statistics recorded and list of array
                values with one array per chain and the leading dimension of
                the arrays corresponding to the sampling index.
        """
        n_chain = len(init_states)
        # Create temp directory if memory-mapping enabled and no path provided
        if memmap_enabled and memmap_path is None:
            memmap_path = tempfile.mkdtemp()
        if RANDOMGEN_AVAILABLE:
            seed = self.rng.randint(2**64, dtype='uint64')
            rngs = [
                randomgen.Xorshift1024(seed).jump(i).generator
                for i in range(n_chain)
            ]
        else:
            seeds = (self.rng.choice(2**16, n_chain, False) * 2**16 +
                     self.rng.choice(2**16, n_chain, False))
            rngs = [np.random.RandomState(seed) for seed in seeds]
        if n_process == 1:
            # Using single process therefore run chains sequentially
            chain_outputs = []
            for c, (rng, init_state) in enumerate(zip(rngs, init_states)):
                chains, chain_stats, n_sample_chain = self._sample_chain(
                    rng,
                    n_sample,
                    init_state,
                    chain_var_funcs,
                    chain_index=c,
                    parallel_chains=False,
                    memmap_enabled=memmap_enabled,
                    memmap_path=memmap_path)
                chain_outputs.append((chains, chain_stats, n_sample_chain))
                if n_sample_chain != n_sample:
                    logger.error(
                        f'Sampling manually interrupted at chain {c} iteration'
                        f' {n_sample_chain}. Arrays containing chain variables'
                        f' and statistics computed before interruption will'
                        f' be returned, all entries for iteration '
                        f' {n_sample_chain} and above of chain {c} should be'
                        f' ignored.')
                    break
        else:
            # Run chains in parallel using a multiprocess(ing).Pool
            # Child processes made to ignore SIGINT signals to allow handling
            # of KeyboardInterrupts in parent process
            with Pool(n_process, _ignore_sigint_initialiser) as pool:
                try:
                    chain_outputs = pool.starmap(
                        self._sample_chain,
                        zip(
                            rngs,
                            [n_sample] * n_chain,
                            init_states,
                            [chain_var_funcs] * n_chain,
                            range(n_chain),  # chain_index
                            [True] * n_chain,  # parallel_chains flags
                            [memmap_enabled] * n_chain,
                            [memmap_path] * n_chain,
                        ))
                except KeyboardInterrupt:
                    # Close any still running processes
                    pool.terminate()
                    pool.join()
                    err_message = 'Sampling manually interrupted.'
                    if memmap_enabled:
                        err_message += (
                            f' Chain data recorded so far is available in '
                            f'directory {memmap_path}.')
                    logger.error(err_message)
                    raise
        # When running parallel jobs with memory-mapping enabled, data arrays
        # returned by processes as file paths to array memory-maps therfore
        # load memory-maps objects from file before returing results
        load_memmaps = memmap_enabled and n_process > 1
        return self._collate_chain_outputs(n_sample, chain_outputs,
                                           load_memmaps, stack_chain_arrays)
Exemplo n.º 28
0
def fmultiprocess(log, function, inputArray, poolSize=False, **kwargs):
    """multiprocess pool

    **Key Arguments:**
        - ``log`` -- logger
        - ``function`` -- the function to multiprocess
        - ``inputArray`` -- the array to be iterated over

    **Return:**
        - ``resultArray`` -- the array of results

    **Usage:**

        .. code-block:: python 

            from fundamentals import multiprocess
            # DEFINE AN INPUT ARRAY
            inputArray = range(10000)
            results = multiprocess(log=log, function=functionName,
                                  inputArray=inputArray, otherFunctionKeyword="cheese")
    """
    log.info('starting the ``multiprocess`` function')

    # DEFINTE POOL SIZE - NUMBER OF CPU CORES TO USE (BEST = ALL - 1)
    # if cpu_count() > 1:
    #     poolSize = cpu_count() - 1
    # else:
    #     poolSize = 1

    # if len(inputArray) < poolSize:
    #     poolSize = len(inputArray)
    if poolSize:
        p = Pool(processes=poolSize)
    else:
        p = Pool()

    # MAP-REDUCE THE WORK OVER MULTIPLE CPU CORES
    try:
        mapfunc = partial(function, log=log, **kwargs)
        resultArray = p.map(mapfunc, inputArray)
    except:
        try:
            mapfunc = partial(function, **kwargs)
            resultArray = p.map(mapfunc, inputArray)
        except:
            mapfunc = partial(function, log=log, **kwargs)
            resultArray = p.map(mapfunc, inputArray)

    p.close()
    p.terminate()
    p.join()

    log.info('completed the ``multiprocess`` function')
    return resultArray
            if len(a) > 0:
                ntb_to_nissl[ntb_v] = np.unique(a)[0]

        ntb_values = np.arange(0, 5000)
        ntb_matched_values = np.interp(ntb_values, 
                                       [ntb_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())], 
                                       [nissl_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())])
    
        sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t))
        
        return ntb_matched_values, (region1_x, region1_y, region1_w, region1_h)
        
    
    n_regions = 8
    
    pool = Pool(4)
    res = pool.map(f, range(n_regions))
    ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res)
    pool.close()
    pool.join()
    
#     for region_id in range(10):
        
#         while True:
#             region1_x = np.random.randint(0, w-10000, 1)[0]
#             region1_y = np.random.randint(0, h-10000, 1)[0]
#             region1_w = 5000
#             region1_h = 5000
#             print region1_x, region1_y, region1_w, region1_h
            
#             tb_region1_xmin = region1_x / 32
        ntb_blue_bins = np.arange(5001)
    
        ntb_blue_inv_bins = np.arange(5001)
        ntb_inv_to_nissl_mapping = np.interp(ntb_blue_inv_bins, ntb_inv_vals, nissl_vals)
        
        ntb_to_nissl_mapping = ntb_inv_to_nissl_mapping[5000 - ntb_blue_bins]
        ntb_to_nissl_mapping = np.round(ntb_to_nissl_mapping).astype(np.uint8)
                        
        ntb_matched_values_all_examples_one_section.append(ntb_to_nissl_mapping)
        region_bboxes_all_examples_one_section.append((region1_x, region1_y, region1_w, region1_h))
    
        sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t))
        
        return ntb_to_nissl_mapping, (region1_x, region1_y, region1_w, region1_h)
            
    pool = Pool(4)
    res = pool.map(match_intensity_histogram_one_region, regions)
    ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res)
    pool.close()
    pool.join()
    

    fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_intensity_mapping_all_regions.npy' % (ntb_fn, nissl_fn))
    create_parent_dir_if_not_exists(fp)
    np.save(fp, np.asarray(ntb_matched_values_all_examples_one_section))
    upload_to_s3(fp)

    fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_region_bboxes.npy' % (ntb_fn, nissl_fn))
    np.save(fp, np.asarray(region_bboxes_all_examples_one_section))
    upload_to_s3(fp)
Exemplo n.º 31
0
def get_pool():
    global pool
    if pool is None:
        pool = Pool(initializer=initializer)
    return pool
Exemplo n.º 32
0
def extract_features(
        img_input,
        ft_output,
        network_ckpt, 
        dataset_cstor,
        dataset_args,
        batchifier_cstor,
        out_dir,
        set_type,
        batch_size,
        no_threads,
        gpu_ratio):

    # CPU/GPU option
    cpu_pool = Pool(no_threads, maxtasksperchild=1000)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_ratio)



    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, network_ckpt)
    
        for one_set in set_type:
    
            print("Load dataset -> set: {}".format(one_set))
            dataset_args["which_set"] = one_set
            dataset = dataset_cstor(**dataset_args)
    
            # hack dataset to only keep one game by image
            image_id_set = {}
            games = []
            for game in dataset.games:
                if game.image.id not in image_id_set:
                    games.append(game)
                    image_id_set[game.image.id] = 1

            dataset.games = games
            no_images = len(games)
    
            source_name = os.path.basename(img_input.name[:-2])
            dummy_tokenizer = DummyTokenizer()
            batchifier = batchifier_cstor(tokenizer=dummy_tokenizer, sources=[source_name])
            iterator = Iterator(dataset,
                                batch_size=batch_size,
                                pool=cpu_pool,
                                batchifier=batchifier)
    
            ############################
            #  CREATE FEATURES
            ############################
            print("Start computing image features...")
            filepath = os.path.join(out_dir, "{}_features.h5".format(one_set))
            with h5py.File(filepath, 'w') as f:

                ft_shape = [int(dim) for dim in ft_output.get_shape()[1:]]
                ft_dataset = f.create_dataset('features', shape=[no_images] + ft_shape, dtype=np.float32)
                idx2img = f.create_dataset('idx2img', shape=[no_images], dtype=np.int32)
                pt_hd5 = 0
    
                for batch in tqdm(iterator):
                    feat = sess.run(ft_output, feed_dict={img_input: numpy.array(batch[source_name])})
    
                    # Store dataset
                    batch_size = len(batch["raw"])
                    ft_dataset[pt_hd5: pt_hd5 + batch_size] = feat
    
                    # Store idx to image.id
                    for i, game in enumerate(batch["raw"]):
                        idx2img[pt_hd5 + i] = game.image.id
    
                    # update hd5 pointer
                    pt_hd5 += batch_size
                print("Start dumping file: {}".format(filepath))
            print("Finished dumping file: {}".format(filepath))
    
    
    print("Done!")
Exemplo n.º 33
0
    def predict(self, inputData, transientTime=0, update_processor=lambda x: x, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        manager = Manager()
        predictQueue = manager.Queue()

        # workaround as predict does not support batches atm
        # add dummy dimension to let embedInputData work properly (is optimized to work for batches)
        inputData = inputData.reshape(1, *inputData.shape)
        modifiedInputData = self._embedInputData(inputData)
        modifiedInputData = modifiedInputData[0]
        inputData = inputData[0]

        self.transientTime = transientTime
        self.sharedNamespace.transientTime = transientTime
        predictionOutput = B.zeros(np.insert(self.inputShape, 0, inputData.shape[0] - transientTime))

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[1:]]),
                        axis=rank).reshape(-1, rank).tolist()
        nJobs = len(jobs)

        self.resetState()

        iterator = PredictionArrayIterator(modifiedInputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_predictProcess,
                    initargs=[predictQueue, self])
        pool.map_async(self._predictProcess, iterator, chunksize=200)#, chunksize=1)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = predictQueue.get()
                # result of predicting
                indices, prediction, state = data
                id = self._uniqueIDFromIndices(indices)
                self._xs[id] = state
                # update the values
                predictionOutput[tuple([Ellipsis] + indices)] = prediction

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()

        return predictionOutput
Exemplo n.º 34
0
            os.mkdir(os.path.join(EXPORT_DIR, id))
        write_file(os.path.join(EXPORT_DIR, id, os.path.basename(file)), data)
        os.remove(file)


def mv_erase_dir(file):
    os.rename(file, os.path.join(ERASE_DIR, os.path.basename(file)))


def handle_file(file):
    try:
        data = read_file(file)
        modality = data.Modality
        if modality in ['OP', 'OPT', 'Opt', 'Op']:
            handle_and_mv_export_dir(data, file)
        else:
            mv_erase_dir(file)
    except Exception:
        try:
            os.rename(file, os.path.join(EXCEPTION_DIR,
                                         os.path.basename(file)))
        except Exception:
            pass


if __name__ == "__main__":
    pool = Pool(30)
    for _ in tqdm.tqdm(pool.imap_unordered(handle_file, files),
                       total=len(files)):
        pass
Exemplo n.º 35
0
class KubernetesManager(ClusterManager):

    TAG = "KubernetesManager"

    pool = Pool(5)

    @memoized_property
    def kubernetes_home(self):
        try:
            cmd = ["which", "kubectl.sh"]
            output = subprocess.check_output(cmd)
            return output.split("/cluster/kubectl.sh")[0]
        except subprocess.CalledProcessError as e:
            error_log(self.TAG, "Could not get Kubernetes home: {}".format(e))
            return None

    def _generate_auth_token(self):
        return str(hash(time.time()))

    def _create(self, filename, namespace=None):
        success = True
        try:
            cmd = ["kubectl.sh", "create", "-f", filename]
            if namespace:
                cmd.append('--namespace={0}'.format(namespace))
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError as e:
            msg = "Could not deploy specification: {0} on Kubernetes cluster: {1}".format(
                filename, e)
            error_log(self.TAG, msg)
            success = False
        return success

    def __get_service_url(self, service_name):
        try:
            cmd = ["kubectl.sh", "describe", "service", service_name]
            output = subprocess.check_output(cmd)
            ip_re = re.compile("LoadBalancer Ingress:(?P<ip>.*)\n")
            m = ip_re.search(output)
            if not m:
                error_log(self.TAG,
                          "Could not extract IP from service description")
                return None
            return m.group("ip").strip()
        except subprocess.CalledProcessError as e:
            return None

    def _get_proxy_url(self):
        return self.__get_service_url("proxy-registration")

    def _get_registry_url(self):
        return self.__get_service_url("registry")

    def _get_lookup_url(self):
        #return self.__get_service_url("proxy-lookup")
        return ClusterManager.CLUSTER_HOST

    def _get_pod_ip(self, app_id):
        try:
            cmd = [
                "kubectl.sh", "describe", "pod", "notebook-server",
                "--namespace={}".format(app_id)
            ]
            output = subprocess.check_output(cmd)
            ip_re = re.compile("IP:(?P<ip>.*)\n")
            ready_re = re.compile("State:\s+(?P<ready>.*)")
            m = ip_re.search(output)
            if not m:
                info_log(self.TAG, "Could not extract IP from pod description")
                return None
            return m.group("ip").strip()

            # TODO the following code makes the above check safer (will prevent proxy errors) but is too slow
            ready = ready_re.search(output)
            if not ready:
                warning_log(
                    self.TAG,
                    "Extracted the pod IP, but the notebook container is not ready"
                )
                return None
            else:
                status = ready.group("ready").lower().strip()
                debug_log(self.TAG, "status: {}".format(status))
                if status != "running":
                    info_log(
                        self.TAG,
                        "Extracted the pod IP, but the notebook container is not ready"
                    )
                    return None

        except subprocess.CalledProcessError as e:
            return None

    def _launch_registry_server(self):
        registry_path = os.path.join(MainSettings.ROOT, "registry")

        for name in os.listdir(registry_path):
            self._create(os.path.join(registry_path, name))

        info_log(self.TAG,
                 "Sleeping for 10 seconds so registry launch can complete...")
        time.sleep(10)

    def _launch_proxy_server(self, token):

        # TODO the following chunk of code is reused in App.deploy (should be abstracted away)
        proxy_path = os.path.join(MainSettings.ROOT, "proxy")

        # clean up the old deployment
        deploy_path = os.path.join(proxy_path, "deploy")
        if os.path.isdir(deploy_path):
            shutil.rmtree(deploy_path)
        os.mkdir(deploy_path)

        params = {"token": token}

        # load all the template strings
        templates_path = os.path.join(proxy_path, "deployment")
        template_names = os.listdir(templates_path)
        templates = {}
        for name in template_names:
            with open(os.path.join(templates_path, name), 'r') as tf:
                templates[name] = tf.read()

        # insert the notebooks container into the pod.json template
        for name in template_names:
            with open(os.path.join(deploy_path, name), 'w+') as p_file:
                p_string = fill_template_string(templates[name], params)
                p_file.write(p_string)
            # launch each component
            self._create(os.path.join(deploy_path, name))

    def _read_proxy_info(self):
        with open(os.path.join(MainSettings.ROOT, ".proxy_info"),
                  "r") as proxy_file:
            raw_host, raw_token = proxy_file.readlines()
            return "http://" + raw_host.strip(
            ) + "/api/routes", raw_token.strip()

    def _write_proxy_info(self, url, token):
        with open(os.path.join(MainSettings.ROOT, ".proxy_info"),
                  "w+") as proxy_file:
            proxy_file.write("{}\n".format(url))
            proxy_file.write("{}\n".format(token))

    def _read_registry_url(self):
        with open(os.path.join(MainSettings.ROOT, ".registry_info"),
                  "r") as registry_file:
            url = registry_file.readlines()[0]
            return url

    def _write_registry_url(self, url):
        with open(os.path.join(MainSettings.ROOT, ".registry_info"),
                  "w+") as registry_file:
            registry_file.write("{}\n".format(url))

    def _get_inactive_routes(self, min_inactive):
        now = datetime.utcnow()
        threshold = (now - timedelta(minutes=min_inactive)).isoformat()

        base_url, token = self._read_proxy_info()
        h = {"Authorization": "token {}".format(token)}
        proxy_url = base_url + "?inactive_since={}".format(threshold)
        debug_log(self.TAG, "proxy_url: {}".format(proxy_url))
        try:
            r = requests.get(proxy_url, headers=h)
            if r.status_code == 200:
                routes = r.json().keys()
                return map(lambda r: r[1:], routes)
        except requests.exceptions.ConnectionError:
            warning_log(
                self.TAG,
                "Could not get all routes inactive for {} minutes".format(
                    min_inactive))
        return None

    def _remove_proxy_route(self, app_id):
        base_url, token = self._read_proxy_info()
        h = {"Authorization": "token {}".format(token)}
        proxy_url = base_url + "/" + app_id
        try:
            r = requests.delete(proxy_url, headers=h)
            if r.status_code == 204:
                info_log(self.TAG, "Removed proxy route for {}".format(app_id))
                return True
        except requests.exceptions.ConnectionError:
            error_log(self.TAG,
                      "Could not remove proxy route for {}".format(app_id))
        return False

    def _register_proxy_route(self, app_id):
        num_retries = 30
        pause = 1
        for i in range(num_retries):
            # TODO should the notebook port be a parameter?
            ip = self._get_pod_ip(app_id)
            # TODO this is a stopgap solution for a race condition that should be fixed through other means
            time.sleep(1)
            if ip:
                base_url, token = self._read_proxy_info()
                body = {'target': "http://" + ip + ":8888"}
                h = {"Authorization": "token {}".format(token)}
                proxy_url = base_url + "/" + app_id
                debug_log(
                    self.TAG, "body: {}, headers: {}, proxy_url: {}".format(
                        body, h, proxy_url))
                try:
                    r = requests.post(proxy_url,
                                      data=json.dumps(body),
                                      headers=h)
                    if r.status_code == 201:
                        info_log(
                            self.TAG,
                            "Proxying {} to {}".format(proxy_url,
                                                       ip + ":8888"))
                        return True
                    else:
                        raise Exception(
                            "could not register route with proxy server")
                except requests.exceptions.ConnectionError:
                    error_log(self.TAG, "could not connect to proxy server")
                    pass
            info_log(
                self.TAG,
                "App not yet assigned an IP address. Waiting for {} seconds..."
                .format(pause))
            time.sleep(pause)

        return False

    def get_running_apps(self):
        try:
            proxy_loc = MainSettings.KUBE_PROXY_HOST + ':' + MainSettings.KUBE_PROXY_PORT
            url = urljoin(proxy_loc, "/api/v1/pods")
            r = requests.get(url)
            if r.status_code != 200:
                error_log(self.TAG, "could not get list of running pods")
                return None
            json = r.json()
            if 'items' not in json:
                error_log(self.TAG,
                          "pods api endpoint returning malformed JSON")
                return None
            pod_specs = json['items']
            pods = []
            for pod_spec in pod_specs:
                meta = pod_spec['metadata']
                if meta['namespace'] == 'kube-system' or meta[
                        'namespace'] == 'default':
                    continue
                if meta['name'] == 'notebook-server':
                    full_image = pod_spec['spec']['containers'][0]['image']
                    image_name = full_image.split('/')[-1]
                    pods.append((meta['namespace'], image_name))
            return pods
        except ConnectionError as e:
            error_log(self.TAG, e)
            return None

    def _nodes_command(self, func, shell=False):
        provider = os.environ["KUBERNETES_PROVIDER"]

        if isinstance(func, str):
            func_str = func

            def _func(node, zone):
                split = node.split()
                if len(split) > 0:
                    node_name = split[0]
                    if node_name != "kubernetes-master":
                        info_log(
                            self.TAG,
                            "Running {0} on {1}...".format(func, node_name))
                        cmd = [
                            "gcloud", "compute", "ssh", node_name, "--zone",
                            zone, "--command", "{}".format(func_str)
                        ]
                        return subprocess.Popen(cmd, shell=shell)
                return None

            func = _func

        if provider == 'gce':

            # get zone info
            zone = os.environ.get("KUBE_GCE_ZONE")
            if not zone:
                zone_re = re.compile(
                    "ZONE\=\$\{KUBE_GCE_ZONE:\-(?P<zone>.*)\}")
                with open(
                        os.path.join(self.kubernetes_home,
                                     "cluster/gce/config-default.sh"),
                        'r') as f:
                    m = zone_re.search(f.read())
                    if m:
                        zone = m.group("zone")
                    else:
                        error_log(self.TAG, "zone could not be determined")
            if not zone:
                return False

            nodes_cmd = ["kubectl.sh", "get", "nodes"]
            output = subprocess.check_output(nodes_cmd)
            nodes = output.split("\n")[1:]

            return [func(node, zone) for node in nodes]

        elif provider == 'aws':
            # TODO support aws
            return []

        else:
            warning_log(self.TAG,
                        "Only aws and gce providers are currently supported")
            return []

    def get_total_capacity(self):
        def _get_capacity(node, zone):
            pod_re = re.compile(".*pods:\s+(?P<pods>\d+)")
            split = node.split()
            if len(split) > 0:
                node_name = split[0]
                cmd = ['kubectl.sh', 'describe', 'node', node_name]
                output_lines = subprocess.check_output(cmd).split('\n')
                match_lines = [
                    pod_re.search(l) for l in output_lines if pod_re.search(l)
                ]
                if match_lines:
                    return int(match_lines[0].group('pods'))
                return 0
            return 0

        caps = self._nodes_command(_get_capacity)
        return sum(caps)

    def preload_image(self, image_name):
        def _preload(node, zone):
            split = node.split()
            if len(split) > 0:
                node_name = split[0]
                if node_name != "kubernetes-master":
                    info_log(
                        self.TAG, "Preloading {0} onto {1}...".format(
                            image_name, node_name))
                    docker_cmd = "sudo gcloud docker pull {0}/{1}".format(
                        MainSettings.REGISTRY_NAME, image_name)
                    cmd = [
                        "gcloud", "compute", "ssh", node_name, "--zone", zone,
                        "--command", "{}".format(docker_cmd)
                    ]
                    return subprocess.Popen(cmd)
            return None

        procs = self._nodes_command(_preload)
        info_log(self.TAG, "Waiting for preloading to finish...")
        for proc in procs:
            if proc:
                proc.wait()
        info_log(self.TAG,
                 "Preloaded image {} onto all nodes".format(image_name))
        return True

    def _start_proxy_server(self):
        token = self._generate_auth_token()
        self._launch_proxy_server(token)
        num_retries = 5
        for i in range(num_retries):
            debug_log(self.TAG, "Sleeping for 20s before getting proxy URL")
            time.sleep(20)
            proxy_url = self._get_proxy_url()
            if proxy_url:
                debug_log(self.TAG, "proxy_url: {}".format(proxy_url))
                # record the proxy url and auth token
                self._write_proxy_info(proxy_url, token)
                break
        if not proxy_url:
            error_log(
                self.TAG,
                "Could not obtain the proxy server's URL. Cluster launch unsuccessful"
            )
            return False

    def _start_registry_server(self):
        # TODO remove duplicated code here
        self._launch_registry_server()
        num_retries = 5
        for i in range(num_retries):
            debug_log(self.TAG, "Sleeping for 20s before getting registry URL")
            time.sleep(20)
            registry_url = self._get_registry_url()
            if registry_url:
                debug_log(self.TAG, "registry_url: {}".format(registry_url))
                # record the registry url
                self._write_registry_url(registry_url)
                break
        if not registry_url:
            error_log(
                self.TAG,
                "Could not obtain the registry server's URL. Cluster launch unsuccessful"
            )
            return False

    def _preload_registry_server(self):
        try:
            subprocess.check_call([
                "docker", "pull",
                "{}/binder-base".format(MainSettings.DOCKER_HUB_USER)
            ])
            subprocess.check_call([
                "docker", "tag",
                "{}/binder-base".format(MainSettings.DOCKER_HUB_USER),
                "{}/binder-base".format(MainSettings.REGISTRY_NAME)
            ])
            subprocess.check_call([
                "docker", "push",
                "{}/binder-base".format(MainSettings.REGISTRY_NAME)
            ])
            return True
        except subprocess.CalledProcessError as e:
            error_log(
                self.TAG,
                "Could not preload registry server with binder-base image: {}".
                format(e))
            return False

    def start(self, num_minions=3, provider="gce"):
        success = True
        try:
            # start the cluster
            os.environ["NUM_MINIONS"] = str(num_minions)
            os.environ["KUBERNETES_PROVIDER"] = provider
            subprocess.check_call(['kube-up.sh'])

            # launch binderd
            binderd_proc = subprocess.Popen(["binderd"])
            # sleep just for good measure (modules starting up)
            time.sleep(5)

            # generate an auth token and launch the proxy server
            info_log(self.TAG, "Launching proxy server...")
            self._start_proxy_server()

            # launch the private Docker registry
            info_log(self.TAG, "Launching private Docker registry...")
            self._start_registry_server()
            info_log(self.TAG,
                     "Preloading registry server with binder-base image...")
            self._preload_registry_server()

            # preload the generic base image onto all the workers
            info_log(self.TAG,
                     "Preloading binder-base image onto all nodes...")
            success = success and self.preload_image("binder-base")

            # start the inactive app removal cron job
            cron = CronTab()
            cmd = " ".join([
                get_env_string(),
                os.path.join(MainSettings.ROOT, "util", "stop-inactive-apps"),
                "&>/tmp/binder-cron"
            ])
            job = cron.new(cmd, comment="binder-stop")
            job.minute.every(MonitoringSettings.APP_CRON_PERIOD)
            job.enable(True)
            cron.write_to_user(user=True)

        except subprocess.CalledProcessError as e:
            success = False

        if success:
            info_log(self.TAG, "Started Kubernetes cluster successfully")
        else:
            error_log(self.TAG, "Could not launch the Kubernetes cluster")
        return success

    def stop(self, provider="gce"):
        try:
            os.environ["KUBERNETES_PROVIDER"] = provider
            subprocess.check_call(['kube-down.sh'])

            # start the inactive app removal cron job
            cron = CronTab()
            jobs = cron.find_comment("binder-stop")
            for job in jobs:
                job.enable(False)
                cron.remove(job)
            cron.write_to_user(user=True)

        except subprocess.CalledProcessError as e:
            error_log(self.TAG, "Could not destroy the Kubernetes cluster")

    def destroy_app(self, app_id):
        pass

    def list_apps(self):
        pass

    def deploy_app(self, app_id, app_dir):
        success = True

        # first create a namespace for the app
        success = self._create(os.path.join(app_dir, "namespace.json"))

        # now launch all other components in the new namespace
        for f in os.listdir(app_dir):
            if f != "namespace.json":
                path = os.path.join(app_dir, f)
                success = success and self._create(path, namespace=app_id)
                if not success:
                    error_log(
                        self.TAG,
                        "Could not deploy {0} on Kubernetes cluster".format(
                            path))

        # create a route in the proxy
        success = success and self._register_proxy_route(app_id)
        if not success:
            error_log(self.TAG,
                      "Could not deploy {} on Kubernetes cluster".format(path))
            return None

        lookup_url = self._get_lookup_url()
        app_url = urljoin("https://" + lookup_url, app_id)
        info_log(self.TAG, "Access app at: \n   {}".format(app_url))
        return app_url

    def stop_app(self, app_id):
        if app_id == "kube-system":
            return
        try:
            self._remove_proxy_route(app_id)
            stop_cmd = [
                "kubectl.sh", "stop", "pods,services,replicationControllers",
                "--all", "--namespace={}".format(app_id)
            ]
            cleanup_cmd = ["kubectl.sh", "delete", "namespace", app_id]
            subprocess.check_call(stop_cmd)
            subprocess.check_call(cleanup_cmd)
            info_log(self.TAG, "Stopped app {}".format(app_id))
        except subprocess.CalledProcessError as e:
            error_log(self.TAG, "Could not stop app {}".format(app_id))

    def _stop_apps(self, app_ids):
        if not app_ids:
            info_log(self.TAG, "No apps to stop")
            return
        for app_id in app_ids:
            self.stop_app(app_id)

    def stop_inactive_apps(self, min_inactive):
        routes = self._get_inactive_routes(min_inactive)
        self._stop_apps(routes)

    def stop_all_apps(self):
        app_ids = map(lambda app: app[0], self.get_running_apps())
        self._stop_apps(app_ids)
Exemplo n.º 36
0
    def parse_files(self, workers, data_dicts=None):
        """Parse all files"""
        print("\n\n### Parsing files ###")
        os.chdir(self.workdir)  # questionable

        if data_dicts is None:
            data_dicts = [{
                "filename": fn.name
            } for fn in os.scandir(self.textdir)]
        filequeue = [{
            "name": d["filename"],
            "size": os.path.getsize(self.textdir + d["filename"]),
            "id": n + 1,
            "options": d["options"] if "options" in d else {},
            "newpath": self.textdir + d["filename"],
            "raw": self.workdir + d["filename"] + ".raw",
            "words": self.workdir + d["filename"] + ".words.sorted",
            "toms": self.workdir + d["filename"] + ".toms",
            "sortedtoms": self.workdir + d["filename"] + ".toms.sorted",
            "pages": self.workdir + d["filename"] + ".pages",
            "refs": self.workdir + d["filename"] + ".refs",
            "graphics": self.workdir + d["filename"] + ".graphics",
            "lines": self.workdir + d["filename"] + ".lines",
            "results": self.workdir + d["filename"] + ".results",
        } for n, d in enumerate(data_dicts)]

        self.raw_files = [f["raw"] + ".lz4" for f in filequeue]

        self.metadata_hierarchy.append([])
        # Adding in doc level metadata
        for d in data_dicts:
            for k in list(d.keys()):
                if k not in self.metadata_fields:
                    self.metadata_fields.append(k)
                    self.metadata_hierarchy[0].append(k)
                if k not in self.metadata_types:
                    self.metadata_types[k] = "doc"
                    # don't need to check for conflicts, since doc is first.

        # Adding non-doc level metadata
        for element_type in self.parser_config["metadata_to_parse"]:
            if element_type != "page" and element_type != "ref" and element_type != "line":
                self.metadata_hierarchy.append([])
                for param in self.parser_config["metadata_to_parse"][
                        element_type]:
                    if param not in self.metadata_fields:
                        self.metadata_fields.append(param)
                        self.metadata_hierarchy[-1].append(param)
                    if param not in self.metadata_types:
                        self.metadata_types[param] = element_type
                    else:  # we have a serious error here!  Should raise going forward.
                        pass

        print("%s: parsing %d files." % (time.ctime(), len(filequeue)))
        with tqdm(total=len(filequeue), leave=False) as pbar:
            with Pool(workers) as pool:
                for results in pool.imap_unordered(self.__parse_file,
                                                   zip(filequeue, data_dicts)):
                    with open(results, "rb") as proc_fh:
                        vec = pickle.load(
                            proc_fh
                        )  # load in the results from the child's parsework() function.
                    self.omax = [max(x, y) for x, y in zip(vec, self.omax)]
                    pbar.update()
        print("%s: done parsing" % time.ctime())
def qsd_solve(
    H,
    psi0,
    tspan,
    Ls,
    sdeint_method,
    obsq=None,
    normalized_equation=True,
    normalize_state=True,
    multiprocessing=False,
    ntraj=1,
    processes=8,
    seed=1,
    implicit_type=None,
):
    '''
    Args:
        H: NxN csr matrix, dtype = complex128
            Hamiltonian.
        psi0: Nx1 csr matrix, dtype = complex128
            input state.
        tspan: numpy array, dtype = float
            Time series of some length T.
        Ls: list of NxN csr matrices, dtype = complex128
            System-environment interaction terms (Lindblad terms).
        sdeint_method (Optional) SDE solver method:
            Which SDE solver to use. Default is sdeint.itoSRI2.
        obsq (optional): list of NxN csr matrices, dtype = complex128
            Observables for which to generate trajectory information.
            Default value is None (no observables).
        normalized_equation (optional): Boolean
            Use the normalized quantum state diffusion equations. (TODO: case False)
        normalize_state (optional): Boolean
            Whether to numerically normalize the equation at each step.
        multiprocessing (optional): Boolean
            Whether or not to use multiprocessing
        ntraj (optional): int
            number of trajectories.
        processes (optional): int
            number of processes. If processes == 1, don't use multiprocessing.
        seed (optional): int
            Seed for random noise.
        implicit_type (optional): string
            Type of implicit solver to use if the solver is implicit.

    Returns:
        A dictionary with the following keys and values:
            ['psis'] -> np.array with shape = (ntraj,T,N) and dtype = complex128
            ['obsq_expects'] -> np.array with shape = (ntraj,T,len(obsq)) and dtype = complex128

    '''

    ## Check dimensions of inputs. These should be consistent with qutip Qobj.data.
    N = psi0.shape[0]
    if psi0.shape[1] != 1:
        raise ValueError("psi0 should have dimensions Nx1.")
    a, b = H.shape
    if a != N or b != N:
        raise ValueError("H should have dimensions NxN (same size as psi0).")
    for L in Ls:
        a, b = L.shape
        if a != N or b != N:
            raise ValueError(
                "Every L should have dimensions NxN (same size as psi0).")
    ## Determine seeds for the SDEs
    if type(seed) is list or type(seed) is tuple:
        assert len(seed) == ntraj
        seeds = seed
    elif type(seed) is int or seed is None:
        np.random.seed(seed)
        seeds = [np.random.randint(1000000) for _ in range(ntraj)]
    else:
        raise ValueError("Unknown seed type.")

    T_init = time()
    psi0_arr = np.asarray(psi0.todense()).T[0]
    x0 = np.concatenate([psi0_arr.real, psi0_arr.imag])
    drift_diffusion = drift_diffusion_holder(H, Ls, tspan)

    f = complex_to_real_vector(drift_diffusion.f)
    G = complex_to_real_matrix(drift_diffusion.G)

    def SDE_helper(args, s):
        '''Let's make different wiener increments for each trajectory'''
        m = 2 * len(Ls)
        N = len(tspan) - 1
        h = (tspan[N - 1] - tspan[0]) / (N - 1)
        np.random.seed(s)
        dW = np.random.normal(0.0, np.sqrt(h), (N, m)) / np.sqrt(2.)
        if implicit_type is None:
            out = sdeint_method(*args, dW=dW, normalized=normalize_state)
        try:
            out = sdeint_method(*args,
                                dW=dW,
                                normalized=normalize_state,
                                implicit_type=implicit_type)
        except TypeError:
            print("Not an implicit method. implicit_type argument ignored.")
            out = sdeint_method(*args, dW=dW, normalized=normalize_state)
        return out

    ## simulation parameters
    params = [[f, G, x0, tspan]] * ntraj

    if multiprocessing:
        pool = Pool(processes=processes, )
        outputs = pool.map(lambda z: SDE_helper(z[0], z[1]),
                           zip(params, seeds))
    else:
        outputs = [SDE_helper(p, s) for p, s in zip(params, seeds)]
    try:
        xs = np.array([o["trajectory"] for o in outputs])
    except KeyError:
        print("Warning: trajectory not returned by SDE method!")
    try:
        norms = np.array([o["norms"] for o in outputs])
    except KeyError:
        print("Warning: norms not returned by SDE method!")
        norms = None

    print("done running simulation!")

    psis = xs[:, :, :int(len(x0) / 2)] + 1j * xs[:, :, int(len(x0) / 2):]

    # Obtaining expectations of observables
    obsq_expects = (np.asarray([[
        np.asarray([ob.dot(psi).dot(psi.conj()) for ob in obsq])
        for psi in psis[i]
    ] for i in range(ntraj)]) if not obsq is None else None)

    T_fin = time()
    print("Run time:  ", T_fin - T_init, " seconds.")
    return {
        "psis": psis,
        "obsq_expects": obsq_expects,
        "seeds": seeds,
        "norms": norms
    }
Exemplo n.º 38
0
def dump_messages(dmp, **kwargs):
    """Сообщения

    dmp: Dumper object
    """
    global users

    folder = os.path.join('dump', 'dialogs')
    os.makedirs(folder, exist_ok=True)

    print('[получение диалогов...]')
    print('\x1b[2K  0/???', end='\r')

    conversations = dmp._vk_tools.get_all(
        method='messages.getConversations',
        max_count=200,
        values={
            'extended': 1,
            'fields': 'first_name, last_name, name'
        })

    print('\x1b[2K  {}/{}'.format(len(conversations['items']), conversations['count']))
    if dmp._DUMP_DIALOGS_ONLY:
        print('[будет сохранено диалогов: {}]'.format(len(dmp._DUMP_DIALOGS_ONLY)), end='\n\n')
    else:
        print('[будет исключено диалогов: {}]'.format(len(dmp._EXCLUDED_DIALOGS)), end='\n\n')

    print('Сохранение диалогов:')
    for con in conversations['items']:
        did = con['conversation']['peer']['id']

        pass_dialog = False
        if dmp._DUMP_DIALOGS_ONLY:
            if did not in dmp._DUMP_DIALOGS_ONLY:
                if dmp._settings['HIDE_EXCLUDED_DIALOGS']:
                    continue
                else:
                    pass_dialog = True
        elif did in dmp._EXCLUDED_DIALOGS:
            if dmp._settings['HIDE_EXCLUDED_DIALOGS']:
                continue
            else:
                pass_dialog = True

        if con['conversation']['peer']['type'] == 'user':
            if did not in users:
                users_add(dmp._vk, did)
            dialog_name = users.get(did)['name']
        elif con['conversation']['peer']['type'] == 'group':
            if did not in users:
                users_add(dmp._vk, did)
            dialog_name = users.get(did)['name']
        elif con['conversation']['peer']['type'] == 'chat':
            dialog_name = con['conversation']['chat_settings']['title']
        else:
            dialog_name = r'{unknown}'

        for c in dmp._INVALID_CHARS:
            if c in dialog_name:
                dialog_name = dialog_name.replace(c, dmp._settings['REPLACE_CHAR'])

        fn = '{}_{id}'.format('_'.join(dialog_name.split(' ')), id=did)
        for n in os.listdir(folder):
            if str(did) == n.split('.txt')[0].split('_')[-1]:
                if dmp._settings['KEEP_DIALOG_NAMES']:
                    fn = n.split('.txt')[0]
                else:
                    shutil.move(os.path.join(folder, n),
                                os.path.join(folder, '{}_{id}'.format('_'.join(dialog_name.split(' ')), id=did) + ('.txt' if '.txt' in n else '')))

        print('  Диалог: {}{nfn}'.format(dialog_name, nfn=(' (as {})'.format(fn) if ' '.join(fn.split('_')[:-1]) != dialog_name else '')))
        if pass_dialog is True:
            print('    [исключён]\n')
            continue

        values = {
            'peer_id': con['conversation']['peer']['id'],
            'extended': 1,
            'fields': 'first_name, last_name'
        }

        append = {'use': dmp._settings['DIALOG_APPEND_MESSAGES'] and
                  os.path.exists(os.path.join(folder, f'{fn}.txt'))}
        try:
            if append['use']:
                with open(os.path.join(folder, f'{fn}.txt'), 'rb') as t:
                    t.seek(-2, 2)
                    while t.read(1) != b'\n':
                        t.seek(-2, 1)
                    last = t.readline().decode()

                    r = re.match('^\[last:[0-9]+\]$', last)
                    if r:
                        start_message_id = int(re.search(r'\d+', r.group(0)).group(0))
                        values['start_message_id'] = start_message_id

                        t.seek(-len(last.encode('utf-8'))-2, 1)
                        while True:
                            while t.read(1) != b'\n':
                                t.seek(-2, 1)
                            tmp = t.readline().decode()
                            r = re.match('^ {8}\[\d+ [а-я a-z]+ \d+\]$', tmp)
                            # TODO: получение last_id по последнему сообщению (???)
                            if r:
                                append['prev_date'] = re.search('\d+ [а-я a-z]+ \d+', r.group(0)).group(0)
                                break
                            else:
                                t.seek(-len(tmp.encode('utf-8'))-2, 1)
                    else:
                        values['rev'] = 1
                        append['use'] = False
            else:
                values['rev'] = 1
        except OSError:
            values['rev'] = 1
            append['use'] = False

        print('    [кэширование]')
        print('\x1b[2K      0/???', end='\r')

        try:
            history = dmp._vk_tools.get_all(
                method='messages.getHistory',
                max_count=200,
                values=values,
                negative_offset=append['use'])
            print('\x1b[2K      {}/{}'.format(len(history['items']),
                                              history['count']))
            if len(history['items']) == 0:
                print()
                continue
        except VkToolsException:
            print('\x1b[2K      0/0\n')
            continue

        if append['use']:
            def sortById(msg):
                return msg['id']
            history['items'].sort(key=sortById)

        attachments = {
            'photos': [],
            'video_ids': [],
            'docs': [],
            'audio_messages': []
        }

        if append['use']:
            tmp = ''
        else:
            f = open(os.path.join(folder, f'{fn}.txt'), 'w', encoding='utf-8')

        count = len(history['items'])
        print('    [сохранение сообщений]')
        print('      {}/{}'.format(0, count), end='\r')
        prev = None
        prev_date = None

        if append['use']:
            prev_date = append['prev_date']

        for i in range(count):
            m = history['items'][i]

            if m['from_id'] not in users:
                users_add(dmp._vk, m['from_id'])

            res = message_handler(dmp._vk, m)

            date = time_handler(m['date'])
            hold = ' ' * (users.get(m['from_id'])['length'] + 2)

            msg = res['date'] + ' '
            msg += hold if (prev and date and prev == m['from_id'] and prev_date == date) \
                        else users.get(m['from_id'])['name'] + ': '

            if res['messages']:
                msg += res['messages'][0] + '\n'
                for r in res['messages'][1:]:
                    msg += hold + ' '*8 + r + '\n'
            else:
                msg += '\n'

            for a in res['attachments']['audio_messages']:
                if a not in attachments['audio_messages']:
                    attachments['audio_messages'].append(a)

            if dmp._settings['SAVE_DIALOG_ATTACHMENTS']:
                for tp in res['attachments']:
                    for a in res['attachments'][tp]:
                        if a not in attachments[tp]:
                            attachments[tp].append(a)

            if prev_date != date:
                if prev_date:
                    if append['use']:
                        tmp += '\n'
                    else:
                        f.write('\n')
                if append['use']:
                    tmp += f'        [{date}]\n'
                else:
                    f.write(f'        [{date}]\n')
                prev_date = date

            if append['use']:
                tmp += msg
            else:
                f.write(msg)
            prev = m['from_id']
            print('\x1b[2K      {}/{}'.format(i+1, count), end='\r')

        if append['use']:
            import codecs

            orig_file = os.path.join(folder, f'{fn}.txt')
            tmp_file = os.path.join(folder, f'{fn}.new')

            try:
                with codecs.open(orig_file, 'r', encoding='utf-8') as fi,\
                     codecs.open(tmp_file, 'w', encoding='utf-8') as fo:

                    for line in fi:
                        if re.match('^\[last:[0-9]+\]$', line):
                            line = tmp+'[last:{}]\n'.format(history['items'][-1]['id'])
                        fo.write(line)
                os.remove(orig_file)
                os.rename(tmp_file, orig_file)
            except Exception:
                os.remove(tmp_file)
        else:
            f.write('[last:{}]\n'.format(history['items'][-1]['id']))
            f.close()
        print()

        if attachments['audio_messages']:
            at_folder = os.path.join(folder, fn)
            af = os.path.join(at_folder, 'Голосовые')
            os.makedirs(af, exist_ok=True)

            print('    [сохранение голосовых сообщений]')
            print('      .../{}'.format(len(attachments['audio_messages'])), end='\r')

            with Pool(dmp._settings['POOL_PROCESSES']) as pool:
                res = pool.starmap(dmp._download,
                                   zip(itertools.repeat(dmp.__class__),
                                       attachments['audio_messages'],
                                       itertools.repeat(af)))

            print('\x1b[2K      {}/{} (total: {})'.format(sum(filter(None, res)),
                                                          len(attachments['audio_messages']),
                                                          len(next(os.walk(af))[2])))

        if dmp._settings['SAVE_DIALOG_ATTACHMENTS']:
            at_folder = os.path.join(folder, fn)
            os.makedirs(at_folder, exist_ok=True)

            if attachments['photos']:
                af = os.path.join(at_folder, 'Фото')
                os.makedirs(af, exist_ok=True)

                print('    [сохранение фото]')
                print('      .../{}'.format(len(attachments['photos'])), end='\r')

                with Pool(dmp._settings['POOL_PROCESSES']) as pool:
                    res = pool.starmap(dmp._download,
                                       zip(itertools.repeat(dmp.__class__),
                                           attachments['photos'],
                                           itertools.repeat(af)))

                print('\x1b[2K      {}/{} (total: {})'.format(sum(filter(None, res)),
                                                              len(attachments['photos']),
                                                              len(next(os.walk(af))[2])))

            if attachments['video_ids']:
                af = os.path.join(at_folder, 'Видео')
                os.makedirs(af, exist_ok=True)

                videos = dmp._vk_tools.get_all(
                    method='video.get',
                    max_count=200,
                    values={
                        'videos': ','.join(attachments['video_ids']),
                        'extended': 1
                    }
                )

                print('    [сохранение видео]')
                print('      .../{}'.format(len(videos['items'])), end='\r')

                try:
                    with Pool(dmp._AVAILABLE_THREADS if dmp._settings['LIMIT_VIDEO_PROCESSES'] else dmp._settings['POOL_PROCESSES']) as pool:
                        res = pool.starmap(dmp._download_video,
                                           zip(itertools.repeat(dmp.__class__),
                                               videos['items'],
                                               itertools.repeat(af)))
                    print('\x1b[2K      {}/{} (total: {})'.format(sum(filter(None, res)),
                                                                  len(videos['items']),
                                                                  len(next(os.walk(af))[2])))
                except MaybeEncodingError:
                    print('\x1b[2K      ???/{} (total: {})'.format(len(videos['items']), len(next(os.walk(af))[2])))

            if attachments['docs']:
                af = os.path.join(at_folder, 'Документы')
                os.makedirs(af, exist_ok=True)

                print('    [сохранение документов]')
                print('      .../{}'.format(len(attachments['docs'])), end='\r')

                with Pool(dmp._settings['POOL_PROCESSES']) as pool:
                    res = pool.starmap(dmp._download,
                                       zip(itertools.repeat(dmp.__class__),
                                           attachments['docs'],
                                           itertools.repeat(af)))

                print('\x1b[2K      {}/{} (total: {})'.format(sum(filter(None, res)),
                                                              len(attachments['docs']),
                                                              len(next(os.walk(af))[2])))

    with open('users.json', 'w', encoding='utf-8') as f:
        json.dump(users, f, ensure_ascii=False, indent=4)
def qsd_solve_two_systems(H1,
                          H2,
                          psi0,
                          tspan,
                          L1s,
                          L2s,
                          R,
                          eps,
                          n,
                          sdeint_method,
                          trans_phase=None,
                          obsq=None,
                          normalize_state=True,
                          downsample=1,
                          ops_on_whole_space=False,
                          multiprocessing=False,
                          ntraj=1,
                          processes=8,
                          seed=1,
                          implicit_type=None):
    '''
    Args:
        H1: N1xN1 csr matrix, dtype = complex128
            Hamiltonian for system 1.
        H2: N2xN2 csr matrix, dtype = complex128
            Hamiltonian for system 2.
        psi0: Nx1 csr matrix, dtype = complex128
            input state.
        tspan: numpy array, dtype = float
            Time series of some length T.
        L1s: list of N1xN1 csr matrices, dtype = complex128
            System-environment interaction terms (Lindblad terms) for system 1.
        L2s: list of N2xN2 csr matrices, dtype = complex128
            System-environment interaction terms (Lindblad terms) for system 2.
        R: float
            reflectivity used to separate the classical versus coherent
            transmission
        eps: float
            The multiplier by which the classical state displaces the coherent
            state
        n: float
            Scalar to multiply the measurement feedback noise
        sdeint_method (Optional) SDE solver method:
            Which SDE solver to use. Default is sdeint.itoSRI2.
        obsq (optional): list of NxN csr matrices, dtype = complex128
            Observables for which to generate trajectory information.
            Default value is None (no observables).
        normalize_state (optional): Boolean
            Whether to numerically normalize the equation at each step.
        downsample: optional, integer to indicate how frequently to save values.
        ops_on_whole_space (optional): Boolean
            whether the Given L and H operators have been defined on the whole
            space or individual subspaces.
        multiprocessing (optional): Boolean
            Whether or not to use multiprocessing
        ntraj (optional): int
            number of trajectories.
        processes (optional): int
            number of processes. If processes == 1, don't use multiprocessing.
        seed (optional): int
            Seed for random noise.

    Returns:
        A dictionary with the following keys and values:
            ['psis'] -> np.array with shape = (ntraj,T,N) and dtype = complex128
            ['obsq_expects'] -> np.array with shape = (ntraj,T,len(obsq)) and dtype = complex128

    '''

    ## Check dimensions of inputs. These should be consistent with qutip Qobj.data.
    N = psi0.shape[0]
    if psi0.shape[1] != 1:
        raise ValueError("psi0 should have dimensions Nx1.")

    ## Determine seeds for the SDEs
    if type(seed) is list or type(seed) is tuple:
        assert len(seed) == ntraj
        seeds = seed
    elif type(seed) is int or seed is None:
        np.random.seed(seed)
        seeds = [np.random.randint(1000000) for _ in range(ntraj)]
    else:
        raise ValueError("Unknown seed type.")

    T_init = time()
    psi0_arr = np.asarray(psi0.todense()).T[0]
    x0 = np.concatenate([psi0_arr.real, psi0_arr.imag])
    drift_diffusion = drift_diffusion_two_systems_holder(
        H1,
        H2,
        L1s,
        L2s,
        R,
        eps,
        n,
        tspan,
        trans_phase=trans_phase,
        ops_on_whole_space=ops_on_whole_space)

    f = complex_to_real_vector(drift_diffusion.f_normalized)
    G = complex_to_real_matrix(drift_diffusion.G_normalized)

    def SDE_helper(args, s):
        '''Let's make different wiener increments for each trajectory'''
        m = 2 * (len(L1s) + len(L2s))
        N = len(tspan) - 1
        h = (tspan[N - 1] - tspan[0]) / (N - 1)
        np.random.seed(s)
        dW = np.random.normal(0.0, np.sqrt(h), (N, m)) / np.sqrt(2.)
        dW_with_conj = insert_conj(dW, port=1)
        if sdeint_method is sdeint.itoQuasiImplicitEuler:
            implicit_ports = [1, 2, int(m / 2 + 1), int(m / 2) + 2]
            out = sdeint_method(*args,
                                dW=dW_with_conj,
                                normalized=normalize_state,
                                downsample=downsample,
                                implicit_ports=implicit_ports)
            return out
        if implicit_type is None:
            out = sdeint_method(*args,
                                dW=dW_with_conj,
                                normalized=normalize_state,
                                downsample=downsample)
            return out
        try:
            out = sdeint_method(*args,
                                dW=dW_with_conj,
                                normalized=normalize_state,
                                downsample=downsample,
                                implicit_type=implicit_type)
        except TypeError:
            print("Not an implicit method. implicit_type argument ignored.")
            out = sdeint_method(*args,
                                dW=dW_with_conj,
                                normalized=normalize_state,
                                downsample=downsample)
        return out

    ## simulation parameters
    params = [[f, G, x0, tspan]] * ntraj

    if multiprocessing:
        pool = Pool(processes=processes, )
        outputs = pool.map(lambda z: SDE_helper(z[0], z[1]),
                           zip(params, seeds))
    else:
        outputs = [SDE_helper(p, s) for p, s in zip(params, seeds)]

    try:
        xs = np.array([o["trajectory"] for o in outputs])
    except KeyError:
        print("Warning: trajectory not returned by SDE method!")
    try:
        norms = np.array([o["norms"] for o in outputs])
    except KeyError:
        print("Warning: norms not returned by SDE method!")
        norms = None

    print("done running simulation!")

    psis = xs[:, :, :int(len(x0) / 2)] + 1j * xs[:, :, int(len(x0) / 2):]

    # Obtaining expectations of observables
    obsq_expects = (np.asarray([[
        np.asarray([ob.dot(psi).dot(psi.conj()) for ob in obsq])
        for psi in psis[i]
    ] for i in range(ntraj)]) if not obsq is None else None)

    T_fin = time()
    print("Run time:  ", T_fin - T_init, " seconds.")
    return {
        "psis": psis,
        "obsq_expects": obsq_expects,
        "seeds": seeds,
        "norms": norms
    }
Exemplo n.º 40
0
def dump_fave_posts(dmp):
    """Вложения понравившихся постов (фото, видео, документы)

    dmp: Dumper object
    """
    folder_photo = os.path.join('dump', 'photo', 'Понравившиеся')
    os.makedirs(folder_photo, exist_ok=True)
    folder_video = os.path.join('dump', 'video', 'Понравившиеся')
    os.makedirs(folder_video, exist_ok=True)
    folder_docs = os.path.join('dump', 'docs', 'Понравившиеся')
    os.makedirs(folder_docs, exist_ok=True)

    print('[получение постов]')

    posts = get_fave(dmp._vk, 'posts')

    # from pprint import pprint
    # print(type(posts))
    # print(type(posts['items'][0]))

    photo = []
    video = []
    docs = []

    for p in posts['items']:
        if 'attachments' in p:
            for at in p['attachments']:
                if at['type'] == 'photo':
                    at['photo']['sizes'].sort(
                        key=itemgetter('width', 'height'))
                    obj = {
                        'url': at['photo']['sizes'][-1]['url'],
                        'prefix': '{}_{}'.format(p['owner_id'], p['id'])
                    }
                    if 'access_key' in at['photo']:
                        obj['access_key'] = at['photo']['access_key']
                    photo.append(obj)
                elif at['type'] == 'video':
                    video.append('{oid}_{id}{access_key}'.format(
                        oid=at['video']['owner_id'],
                        id=at['video']['id'],
                        access_key='_' +
                        (at['video'].get('access_key') or '')))
                elif at['type'] == 'doc':
                    obj = {
                        'url': at['doc']['url'],
                        'prefix': '{}_{}'.format(p['owner_id'], p['id']),
                        'name': '{}_{}'.format(at['doc']['title'],
                                               at['doc']['id']),
                        'ext': at['doc']['ext']
                    }
                    if 'access_key' in at['doc']:
                        obj['access_key'] = at['doc']['access_key']
                    docs.append(obj)

    if video:
        video = dmp._vk_tools.get_all(method='video.get',
                                      max_count=200,
                                      values={
                                          'videos': ','.join(video),
                                          'extended': 1
                                      })

    print('Сохранение ({} вложений из {} постов):'.format(
        sum([len(photo), len(video), len(docs)]), len(posts['items'])))

    if photo:
        print('  [фото ({})]'.format(len(photo)))
        with Pool(dmp._settings['POOL_PROCESSES']) as pool:
            pool.starmap(
                dmp._download,
                zip(itertools.repeat(dmp.__class__), photo,
                    itertools.repeat(folder_photo)))

    try:
        if video:
            print('  [видео ({})]'.format(len(video['items'])))
            with Pool(dmp._settings['POOL_PROCESSES']
                      if not dmp._settings['LIMIT_VIDEO_PROCESSES'] else dmp.
                      _AVAILABLE_THREADS) as pool:
                pool.starmap(
                    dmp._download_video,
                    zip(itertools.repeat(dmp.__class__), video['items'],
                        itertools.repeat(folder_video)))
    except MaybeEncodingError:
        pass

    if docs:
        print('  [документы ({})]'.format(len(docs)))
        with Pool(dmp._settings['POOL_PROCESSES']) as pool:
            pool.starmap(
                dmp._download,
                zip(itertools.repeat(dmp.__class__), docs,
                    itertools.repeat(folder_docs)))
                }
                for rob in study[result_key]:
                    rob.update(study_info)
                    writer.writerow(rob)


def convert_keys_to_string(dictionary):
    """Recursively converts dictionary keys to strings."""
    if not isinstance(dictionary, dict):
        return dictionary
    return dict(
        (str(k), convert_keys_to_string(v)) for k, v in dictionary.items())


if __name__ == '__main__':
    pool = Pool(processes=8)
    reviews = []
    for subdir, dirs, files in os.walk(REVIEWS_DIR):
        for file in files:
            filepath = os.path.join(subdir, file)
            if filepath.endswith('.rm5'):
                reviews.append(filepath)

    reviews = [filepath for filepath in reviews if 'publication' in filepath]
    all_studies = pool.map(extract_review_info, reviews, chunksize=8)

    rob_headers = [
        'file', 'id', 'modified', 'result', 'result_description', 'rob_name',
        'rob_id', 'rob_description', 'group_id', 'group_name'
    ]
    write_results_to_csv('robs.csv', rob_headers, 'robs')
                                                       setting=actual_setting)
        create_parent_dir_if_not_exists(viz_fp)
        try:
            if add_label_text:
                label_text = str(structure)
            else:
                label_text = None

            viz = scoremap_overlay_on(bg='original',
                                      stack=stack,
                                      sec=sec,
                                      structure=structure,
                                      downscale=downscale,
                                      label_text=label_text,
                                      setting=actual_setting)
            imsave(viz_fp, img_as_ubyte(viz))
            upload_from_ec2_to_s3(viz_fp)
        except Exception as e:
            sys.stderr.write('%s\n' % e)
            return

    pool = Pool(NUM_CORES)
    pool.map(save_scoremap, all_known_structures)
    pool.close()
    pool.join()

    sys.stderr.write('Visualize scoremaps: %.2f seconds.\n' %
                     (time.time() - t))
    # 7s for one structure, one section, single process
    # 20s for all structures, one section, 8 processes
Exemplo n.º 43
0
    def fit(self, inputData, outputData, transientTime=0, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions and rank != self.n_inputDimensions + 1:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        # reshape the input so that it has the shape (timeseries, time, input_dimension^n)
        if rank == self.n_inputDimensions:
            inputData = inputData.reshape(1, *inputData.shape)
            outputData = outputData.reshape(1, *outputData.shape)
        else:
            # modify rank again
            rank -= 1

        partialLength = (inputData.shape[1] - transientTime)
        totalLength = inputData.shape[0] * partialLength
        timeseriesCount = inputData.shape[0]

        manager = Manager()
        fitQueue = manager.Queue()

        modifiedInputData = self._embedInputData(inputData)

        self.sharedNamespace.transientTime = transientTime

        self.sharedNamespace.partialLength = partialLength
        self.sharedNamespace.totalLength = totalLength
        self.sharedNamespace.timeseriesCount = timeseriesCount

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[2:]]),
                        axis=rank).reshape(-1, rank).tolist()

        nJobs = len(jobs)

        self.resetState()

        iterator = FittingArrayIterator(modifiedInputData, outputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_fitProcess, initargs=[fitQueue, self])
        pool.map_async(self._fitProcess, iterator, chunksize=16)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = fitQueue.get()

                # result of fitting
                indices, x, WOut = data
                id = self._uniqueIDFromIndices(indices)

                if WOut is None:
                    import sys
                    print("WARNING: Fit process for pixel {0} did not succeed".format(indices), file=sys.stderr)

                # store WOut
                if self._averageOutputWeights:
                    if WOut is not None:
                        self._WOut += WOut / np.prod(self.inputShape)
                else:
                    self._WOuts[id] = WOut

                    # store x
                self._xs[id] = x

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()
            continue

        new_mask = odm.dense_mask_to_sparse_mask(binary_mask,category_ids,default_label=255)
        base_name = wmlu.base_name(full_path)+".png"
        save_path = os.path.join(save_dir,base_name)
        new_mask = new_mask.astype(np.uint8)
        if os.path.exists(save_path):
            print(f"WARNING: File {save_path} exists.")
        cv2.imwrite(save_path,new_mask)
        sys.stdout.write(f"\r{i}")


if __name__ == "__main__":
    data_dir ="/home/wj/ai/mldata/mapillary_vistas/"
    save_dir = os.path.join(data_dir,'boe_labels_validation')
    name_to_id_dict = update_name_to_id(name_to_id_dict,data_dir)
    idxs = list(range(0,18049,50))
    r_idxs = []
    for i in range(len(idxs)-1):
        r_idxs.append([idxs[i],idxs[i+1]])
    wmlu.show_list(r_idxs)
    pool = Pool(10)
    def fun(d):
        trans_data(data_dir,save_dir,d[0],d[1])
    res = list(pool.map(fun,r_idxs))
    pool.close()
    pool.join()
    print(res)
    #list(map(fun,r_idxs))

    def integrate(self,
                  func,
                  min_iter=10,
                  max_iter=20,
                  var_thresh=0.0,
                  max_err=10,
                  neff=float('inf'),
                  nmax=None,
                  progress=False,
                  epoch=None,
                  verbose=True):
        '''
        Evaluate the integral

        Parameters
        ----------
        func : function
            Integrand function
        min_iter : int
            Minimum number of integrator iterations
        max_iter : int
            Maximum number of integrator iterations
        var_thresh : float
            Variance threshold for terminating integration
        max_err : int
            Maximum number of errors to catch before terminating integration
        neff : float
            Effective samples threshold for terminating integration
        nmax : int
            Maximum number of samples to draw
        progress : bool
            Print GMM parameters each iteration
        '''
        err_count = 0
        cumulative_eval_time = 0
        if nmax is None:
            nmax = max_iter * self.n
        while self.iterations < max_iter and self.ntotal < nmax and self.eff_samp < neff:
            #            print('Iteration:', self.iterations)
            if err_count >= max_err:
                print('Exiting due to errors...')
                break
            try:
                self._sample()
            except KeyboardInterrupt:
                print('KeyboardInterrupt, exiting...')
                break
            except Exception as e:
                print(traceback.format_exc())
                print('Error sampling, resetting...')
                err_count += 1
                self._reset()
                continue
            t1 = time.time()
            if self.proc_count is None:
                self.value_array = func(np.copy(self.sample_array))
            else:
                split_samples = np.array_split(self.sample_array,
                                               self.proc_count)
                p = Pool(self.proc_count)
                self.value_array = np.concatenate(p.map(func, split_samples),
                                                  axis=0)
                p.close()
            cumulative_eval_time += time.time() - t1
            self._calculate_prior()
            self._calculate_results()
            self.iterations += 1
            self.ntotal += self.n
            if self.iterations >= min_iter and self.var < var_thresh:
                break
            try:
                self._train()
            except KeyboardInterrupt:
                print('KeyboardInterrupt, exiting...')
                break
            except Exception as e:
                print(traceback.format_exc())
                print('Error training, resetting...')
                err_count += 1
                self._reset()
            if self.user_func is not None:
                self.user_func(self)
            if progress:
                for k in self.gmm_dict:
                    if self.gmm_dict[k] is not None:
                        self.gmm_dict[k].print_params()
            if epoch is not None and self.iterations % epoch == 0:
                self._reset()
            if verbose:
                # Standard mcsampler message, to monitor convergence
                print(" : {} {} {} {} {} ".format(
                    (self.iterations - 1) * self.n, self.eff_samp,
                    np.sqrt(2 * np.max(self.cumulative_values)),
                    np.sqrt(2 * np.log(self.integral)), "-"))
        print('cumulative eval time: ', cumulative_eval_time)
        print('integrator iterations: ', self.iterations)
        big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n
        n += labelmap.max()

    labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg)

    bp.pack_ndarray_file(big_labelmap, labelmap_fp)

#     for tile_i in range(12):
#         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d.tif' % \
#                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))
#         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d_labelmap_cellprofiler.bp' % \
#                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))

    # Generate labelmap viz
    t = time.time()

    viz = img_as_ubyte(label2rgb(big_labelmap, bg_label=0, bg_color=(0, 0, 0)))
    cv2.imwrite(os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.png' % dict(alg=alg), viz);

    sys.stderr.write('Generate labelmap viz: %.2f seconds.\n' % (time.time()-t)) # 60s


t = time.time()

pool = Pool(12)
pool.map(detect_cells, range(first_sec, last_sec+1))
pool.close()
pool.join()

sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
Exemplo n.º 47
0
                    "--image_dir",
                    required=True,
                    help="path to the input dir")
    ap.add_argument("-p",
                    "--plot",
                    type=bool,
                    default=False,
                    required=False,
                    help="plot results")
    ap.add_argument("-ps",
                    "--pool_size",
                    type=int,
                    default=1,
                    required=False,
                    help="pool size for multiprocessing")
    args = vars(ap.parse_args())

    images = glob.glob(args["image_dir"] + '*')
    plot = bool(args["plot"])
    pool_size = int(args["pool_size"])
    print(args["image_dir"], plot)

    pool = Pool(pool_size)
    pool_outputs = pool.map(partial(get_boxes, config=config, plot=plot),
                            images[:])
    pool.close()
    pool.join()
    pool.terminate()
    for output in pool_outputs:
        cv2.imwrite(output[2].replace('\\in', '\\out'), output[3])
Exemplo n.º 48
0
def balance(cool_uri, nproc=1, chunksize=int(1e7), mad_max=5, min_nnz=10,
            min_count=0, ignore_diags=1, tol=1e-5, max_iters=200):
    """
    Cooler contact matrix balancing.
    
    Parameters
    ----------
    cool_uri : str
        URI of cooler group.
    nproc : int
        Number of processes. (Default: 1)
        
    """
    cool_path, group_path = parse_cooler_uri(cool_uri)
    # pre-check the weight column
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        if 'weight' in grp['bins']:
            del grp['bins']['weight'] # Overwrite the weight column
    
    log.info('Balancing {0}'.format(cool_uri))
    
    clr = Cooler(cool_uri)
    
    try:
        if nproc > 1:
            pool = Pool(nproc)
            map_ = pool.imap_unordered
        else:
            map_ = map
        
        if clr.info['metadata']['onlyIntra']=='True':
            onlyIntra = True
        else:
            onlyIntra = False
        
        bias, stats = ice.iterative_correction(
                clr,
                chunksize=chunksize,
                cis_only=onlyIntra,
                trans_only=False,
                tol=tol,
                min_nnz=min_nnz,
                min_count=min_count,
                blacklist=None,
                mad_max=mad_max,
                max_iters=max_iters,
                ignore_diags=ignore_diags,
                rescale_marginals=True,
                use_lock=False,
                map=map_)
    finally:
        if nproc > 1:
            pool.close()
    
    if not stats['converged']:
        log.error('Iteration limit reached without convergence')
        log.error('Storing final result. Check log to assess convergence.')
    
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        # add the bias column to the file
        h5opts = dict(compression='gzip', compression_opts=6)
        grp['bins'].create_dataset('weight', data=bias, **h5opts)
        grp['bins']['weight'].attrs.update(stats)
Exemplo n.º 49
0
    benchmark = Benchmark()
    
    if args.benchmark == 'CEC13':
        func_num = 28
        funcs = [benchmark.cec13[_] for _ in range(func_num)]
    elif args.benchmark == 'CEC17':
        func_num = 30
        funcs = [benchmark.cec17[_] for _ in range(func_num)]

    res = np.empty((func_num, args.repetition))
    cst = np.empty((func_num, args.repetition))

    for i in range(func_num):

        if args.multiprocess:
            p = Pool(5)
            results = p.map(single_opt, [(funcs, i, args)]*args.repetition)
            p.close()
            p.join()

            for idx, tmp in enumerate(results):
                res[i, idx] = tmp[0]
                cst[i, idx] = tmp[1]
        else:
            for j in range(args.repetition):
                res[i, j], cst[i, j] = single_opt((funcs, i, args))

    with open("logs/{}_{}_{}D.pkl".format(args.alg_name, args.benchmark, args.dim), "wb") as f:
        pkl.dump({"res": res, "cst": cst}, f)
Exemplo n.º 50
0
def resample(settings, partial=False, full_cvs=False):
    """
    Resample each shooting point in each thread with different CV definitions to produce new output files with extant
    aimless shooting data.

    This function also assesses decorrelation times and produces one or more decorrelated output files. If and only if
    settings.information_error_checking == True, decorrelated files are produced at each settings.information_error_freq
    increment. In this case, if partial == True, decorrelation will only be assessed for data lengths absent from the
    info_err.out file in the working directory.

    Parameters
    ----------
    settings : argparse.Namespace
        Settings namespace object
    partial : bool
        If True, reads the info_err.out file and only builds new decorrelated output files where the corresponding lines
        are missing from that file. If partial == False, decorrelation is assessed for every valid data length. Has no
        effect if not settings.information_error_checking.
    full_cvs : bool
        If True, also resamples as_full_cvs.out using every prod trajectory in the working directory.

    Returns
    -------
    None

    """

    # todo: test this more thoroughly using a dummy thread and a manual decorrelation time calculation using different software

    # This function is sometimes called from outside the working directory, so make sure we're there
    os.chdir(settings.working_directory)

    # Remove pre-existing output files if any, initialize new one
    open(settings.working_directory + '/as_raw_resample.out', 'w').close()
    if settings.information_error_checking:
        open(settings.working_directory + '/as_raw_timestamped.out',
             'w').close()

    # Load in allthreads from restart.pkl
    try:
        allthreads = pickle.load(open('restart.pkl', 'rb'))
    except FileNotFoundError:
        raise FileNotFoundError(
            'resample = True requires restart.pkl, but could not find one in working directory: '
            + settings.working_directory)

    # Open files for writing outside loop (much faster than opening/closing for each write)
    f1 = open(settings.working_directory + '/as_raw_resample.out', 'a')
    if settings.information_error_checking:
        f2 = open(settings.working_directory + '/as_raw_timestamped.out', 'a')

    # Iterate through each thread's history.init_coords list and obtain CV values as needed
    for thread in allthreads:
        thread.this_cvs_list = [
        ]  # initialize full nested list of CV values for this thread
        thread.cvs_for_later = [
        ]  # need this one with empty lists for failed moves, for indexing reasons
        for step_index in range(len(thread.history.prod_results)):
            if thread.history.prod_results[step_index][0] in ['fwd', 'bwd']:
                if thread.history.prod_results[step_index][0] == 'fwd':
                    this_basin = 'B'
                else:  # 'bwd'
                    this_basin = 'A'

                # Get CVs for this shooting point   # todo: a bit sloppy... can I clean this up?
                try:
                    if not os.path.exists(
                            thread.history.init_coords[step_index][0]):
                        warnings.warn(
                            'attempted to resample ' +
                            thread.history.init_coords[step_index][0] +
                            ' but no such '
                            'file exists in the working directory\nSkipping and continuing',
                            RuntimeWarning)
                        thread.cvs_for_later.append([])
                        continue  # skip to next step_index
                except IndexError:  # getting cv's failed (maybe corrupt coordinate file) so consider this step failed
                    thread.cvs_for_later.append([])
                    continue  # skip to next step_index
                try:
                    this_cvs = get_cvs(
                        thread.history.init_coords[step_index][0], settings)
                except IndexError:  # getting cv's failed (maybe corrupt coordinate file) so consider this step failed
                    thread.cvs_for_later.append([])
                    continue  # skip to next step_index

                # Write CVs to as_raw_resample.out
                f1.write(this_basin + ' <- ' + this_cvs + '\n')
                f1.flush()
                if settings.information_error_checking:
                    f2.write(
                        str(thread.history.timestamps[step_index]) + ' ' +
                        this_basin + ' <- ' + this_cvs + '\n')
                    f2.flush()

                # Append this_cvs to running list for evaluating decorrelation time
                thread.this_cvs_list.append(
                    [[float(item) for item in this_cvs.split(' ')],
                     thread.history.timestamps[step_index]])
                thread.cvs_for_later.append(
                    [float(item) for item in this_cvs.split(' ')])
            else:
                thread.cvs_for_later.append([])

    # Close files just to be sure
    f1.close()
    if settings.information_error_checking:
        f2.close()

    if settings.information_error_checking:  # sort timestamped output file
        shutil.copy(
            settings.working_directory + '/as_raw_timestamped.out',
            settings.working_directory + '/as_raw_timestamped_copy.out')
        open(settings.working_directory + '/as_raw_timestamped.out',
             'w').close()
        with open(settings.working_directory + '/as_raw_timestamped_copy.out',
                  'r') as f:
            for line in sorted(f):
                open(settings.working_directory + '/as_raw_timestamped.out',
                     'a').write(line)
            open(settings.working_directory + '/as_raw_timestamped.out',
                 'a').close()
        os.remove(settings.working_directory + '/as_raw_timestamped_copy.out')

    # Construct list of data lengths to perform decorrelation for
    if settings.information_error_checking:
        if not partial:
            lengths = [
                leng for leng in range(
                    settings.information_error_freq,
                    len(
                        open(
                            settings.working_directory +
                            '/as_raw_timestamped.out', 'r').readlines()) +
                    1, settings.information_error_freq)
            ]
        else:  # if partial
            lengths = [
                leng for leng in range(
                    settings.information_error_freq,
                    len(
                        open(
                            settings.working_directory +
                            '/as_raw_timestamped.out', 'r').readlines()) +
                    1, settings.information_error_freq) if not leng in [
                        int(line.split(' ')[0]) for line in open(
                            settings.working_directory +
                            '/info_err.out', 'r').readlines()
                    ]
            ]
        pattern = re.compile(
            '[0-9]+')  # pattern for reading out timestamp from string
    else:
        lengths = [
            len(
                open(settings.working_directory + '/as_raw_resample.out',
                     'r').readlines())
        ]
        pattern = None

    # Assess decorrelation and write as_decorr.out
    for length in lengths:
        if settings.information_error_checking:
            suffix = '_' + str(
                length
            )  # only use-case with multiple lengths, so this keeps them from stepping on one another's toes
            cutoff_timestamp = int(
                pattern.findall(
                    open(
                        settings.working_directory + '/as_raw_timestamped.out',
                        'r').readlines()[length - 1])[0])
        else:
            cutoff_timestamp = math.inf
            suffix = ''
        open(settings.working_directory + '/as_decorr' + suffix + '.out',
             'w').close()
        f3 = open(settings.working_directory + '/as_decorr' + suffix + '.out',
                  'a')
        for thread in allthreads:
            if thread.this_cvs_list:  # if there were any 'fwd' or 'bwd' results in this thread
                mapped = list(
                    map(
                        list,
                        zip(*[
                            item[0] for item in thread.this_cvs_list
                            if item[1] <= cutoff_timestamp
                        ])))  # list of lists of values of each CV

                slowest_lag = -1  # initialize running tally of slowest autocorrelation time among CVs in this thread
                if settings.include_qdot:
                    ndims = len(thread.this_cvs_list[0]
                                ) / 2  # number of non-rate-of-change CVs
                    if not ndims % 1 == 0:
                        raise ValueError(
                            'include_qdot = True, but an odd number of dimensions were found in the '
                            'threads in restart.pkl, so they can\'t contain inertial terms.'
                        )
                    ndims = int(ndims)
                else:
                    ndims = len(thread.this_cvs_list[0])

                for dim_index in range(ndims):
                    slowest_lag = -1
                    if mapped:
                        this_cv = mapped[dim_index]
                    if len(this_cv) > 1:
                        this_autocorr = stattools.acf(this_cv,
                                                      nlags=len(this_cv) - 1,
                                                      fft=True)
                        for lag in range(len(this_cv) - 1):
                            corr = this_autocorr[lag]
                            if abs(corr) <= 1.96 / numpy.sqrt(len(this_cv)):
                                slowest_lag = lag + 1
                                break

                if slowest_lag > 0:  # only proceed to writing decorrelated output file if a slowest_lag was found
                    # Write the same way as to as_raw_resample.out above, but starting the range at slowest_lag
                    for step_index in range(slowest_lag,
                                            len(thread.history.prod_results)):
                        if thread.history.prod_results[step_index][0] in [
                                'fwd', 'bwd'
                        ] and thread.history.timestamps[
                                step_index] <= cutoff_timestamp:
                            if thread.history.prod_results[step_index][
                                    0] == 'fwd':
                                this_basin = 'B'
                            else:  # 'bwd'
                                this_basin = 'A'

                            # Get CVs for this shooting point and write them to the decorrelated output file
                            if thread.cvs_for_later[step_index]:
                                this_cvs = thread.cvs_for_later[
                                    step_index]  # retrieve CVs from last evaluation
                                f3.write(
                                    this_basin + ' <- ' +
                                    ' '.join([str(item)
                                              for item in this_cvs]) + '\n')

        f3.close()

    # Move resample raw output file to take its place as the only raw output file
    shutil.move(settings.working_directory + '/as_raw_resample.out',
                settings.working_directory + '/as_raw.out')

    # Implement full_cvs
    if full_cvs:
        open(settings.working_directory + '/as_full_cvs.out', 'w').close()
        temp_settings = copy.deepcopy(settings)
        temp_settings.include_qdot = False  # never want to include_qdot in this upcoming call to get_cvs
        try:
            affinity = len(os.sched_getaffinity(0))
        except AttributeError:  # os.sched_getaffinity raises AttributeError on non-UNIX systems.
            affinity = 1
        if affinity == 1:
            with open(settings.working_directory + '/as_full_cvs.out',
                      'a') as f:
                for thread in allthreads:
                    for step_index in range(
                            min([
                                len(thread.history.prod_results),
                                len(thread.history.prod_trajs)
                            ])
                    ):  # just in case one got an extra write in over the other
                        if thread.history.prod_results[step_index] in [[
                                'fwd', 'bwd'
                        ], ['bwd', 'fwd']]:  # if step accepted
                            for job_index in range(2):
                                if os.path.exists(
                                        thread.history.prod_trajs[step_index]
                                    [job_index]):
                                    f.write(
                                        get_cvs(
                                            thread.history.
                                            prod_trajs[step_index][job_index],
                                            temp_settings, False, 'all') +
                                        '\n')
        else:  # affinity > 1
            # Map partial_full_cvs calls to available processes
            with Pool(affinity) as p:
                p.starmap(
                    partial_full_cvs,
                    zip(allthreads, [
                        'partial_full_cvs_' + str(thread_index) + '.out'
                        for thread_index in range(len(allthreads))
                    ], itertools.repeat(temp_settings)))
            # Finally, combine the partial files into the full file
            with open(settings.working_directory + '/as_full_cvs.out',
                      'w') as outfile:
                for fname in [
                        'partial_full_cvs_' + str(thread_index) + '.out'
                        for thread_index in range(len(allthreads))
                ]:
                    with open(fname) as infile:
                        for line in infile:
                            if line:  # skip blank lines
                                outfile.write(line)
                    os.remove(fname)
Exemplo n.º 51
0
            raise

#             input_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=5, version=version, resol='raw')
        out_dir = DataManager.get_image_dir_v2(stack=stack,
                                               prep_id=2,
                                               resol=resol,
                                               version=version)
        print 'out_dir:', out_dir
        #             script = os.path.join(REPO_DIR, 'preprocess', 'warp_crop_IM_v3.py')

        #         ! rm -rf {out_dir}
        create_if_not_exists(out_dir)

        t = time.time()

        pool = Pool(8)
        _ = pool.map(
            lambda img_name: crop(stack=stack,
                                  img_name=img_name,
                                  version=version,
                                  resol=resol,
                                  x=x,
                                  y=y,
                                  w=w,
                                  h=h),
            metadata_cache['valid_filenames'][stack])
        pool.close()
        pool.join()

        #             for img_name in metadata_cache['valid_filenames'][stack]:
        #                 f(stack=stack, img_name=img_name, version=version, resol=resol,
Exemplo n.º 52
0
    def evaluate(self, split_name: str, ds: torch.utils.data.DataLoader):

        # Prepare data saving:
        flag_filepath_format = os.path.join(self.eval_dirpath, split_name,
                                            "{}.flag")

        # Loading model
        self.load_checkpoint()
        self.model.eval()

        # Create pool for multiprocessing
        pool = None
        if not self.config["eval_params"]["patch_size"]:
            # If single image is not being split up, then a pool to process each sample in the batch makes sense
            pool = Pool(processes=self.config["num_workers"])

        compute_polygonization = self.config["eval_params"]["save_individual_outputs"]["poly_shapefile"] or \
                                 self.config["eval_params"]["save_individual_outputs"]["poly_geojson"] or \
                                 self.config["eval_params"]["save_individual_outputs"]["poly_viz"] or \
                                 self.config["eval_params"]["save_aggregated_outputs"]["poly_coco"]

        # Saving individual outputs to disk:
        save_individual_outputs = True in self.config["eval_params"][
            "save_individual_outputs"].values()
        saver_async = None
        if save_individual_outputs:
            save_outputs_partial = partial(
                save_utils.save_outputs,
                config=self.config,
                eval_dirpath=self.eval_dirpath,
                split_name=split_name,
                flag_filepath_format=flag_filepath_format)
            saver_async = async_utils.Async(save_outputs_partial)
            saver_async.start()

        # Saving aggregated outputs
        save_aggregated_outputs = True in self.config["eval_params"][
            "save_aggregated_outputs"].values()

        tile_data_list = []

        if self.gpu == 0:
            tile_iterator = tqdm(ds,
                                 desc="Eval {}: ".format(split_name),
                                 leave=True)
        else:
            tile_iterator = ds
        for tile_i, tile_data in enumerate(tile_iterator):
            # --- Inference, add result to tile_data_list
            if self.config["eval_params"]["patch_size"] is not None:
                # Cut image into patches for inference
                inference.inference_with_patching(self.config, self.model,
                                                  tile_data)
            else:
                # Feed images as-is to the model
                inference.inference_no_patching(self.config, self.model,
                                                tile_data)

            tile_data_list.append(tile_data)

            # --- Accumulate batches into tile_data_list until capacity is reached (or this is the last batch)
            if self.config["eval_params"]["batch_size_mult"] <= len(tile_data_list)\
                    or tile_i == len(tile_iterator) - 1:
                # Concat tensors of tile_data_list
                accumulated_tile_data = {}
                for key in tile_data_list[0].keys():
                    if isinstance(tile_data_list[0][key], list):
                        accumulated_tile_data[key] = [
                            item for _tile_data in tile_data_list
                            for item in _tile_data[key]
                        ]
                    elif isinstance(tile_data_list[0][key], torch.Tensor):
                        accumulated_tile_data[key] = torch.cat(
                            [_tile_data[key] for _tile_data in tile_data_list],
                            dim=0)
                    else:
                        raise TypeError(
                            f"Type {type(tile_data_list[0][key])} is not handled!"
                        )
                tile_data_list = []  # Empty tile_data_list
            else:
                # tile_data_list is not full yet, continue running inference...
                continue

            # --- Polygonize
            if compute_polygonization:
                crossfield = accumulated_tile_data[
                    "crossfield"] if "crossfield" in accumulated_tile_data else None
                accumulated_tile_data["polygons"], accumulated_tile_data[
                    "polygon_probs"] = polygonize.polygonize(
                        self.config["polygonize_params"],
                        accumulated_tile_data["seg"],
                        crossfield_batch=crossfield,
                        pool=pool)

            # --- Save output
            if self.config["eval_params"]["save_individual_outputs"]["seg_mask"] or \
                    self.config["eval_params"]["save_aggregated_outputs"]["seg_coco"]:
                # Take seg_interior:
                seg_pred_mask = self.config["eval_params"][
                    "seg_threshold"] < accumulated_tile_data["seg"][:, 0, ...]
                accumulated_tile_data["seg_mask"] = seg_pred_mask

            accumulated_tile_data = local_utils.batch_to_cpu(
                accumulated_tile_data)
            sample_list = local_utils.split_batch(accumulated_tile_data)

            # Save individual outputs:
            if save_individual_outputs:
                for sample in sample_list:
                    saver_async.add_work(sample)

            # Store aggregated outputs:
            if save_aggregated_outputs:
                self.shared_dict["name_list"].extend(
                    accumulated_tile_data["name"])
                if self.config["eval_params"]["save_aggregated_outputs"][
                        "stats"]:
                    y_pred = accumulated_tile_data["seg"][:, 0, ...].cpu()
                    if "gt_mask" in accumulated_tile_data:
                        y_true = accumulated_tile_data["gt_mask"][:, 0, ...]
                    elif "gt_polygons_image" in accumulated_tile_data:
                        y_true = accumulated_tile_data[
                            "gt_polygons_image"][:, 0, ...]
                    else:
                        raise ValueError(
                            "Either gt_mask or gt_polygons_image should be in accumulated_tile_data"
                        )
                    iou = measures.iou(
                        y_pred.reshape(y_pred.shape[0], -1),
                        y_true.reshape(y_true.shape[0], -1),
                        threshold=self.config["eval_params"]["seg_threshold"])
                    self.shared_dict["iou_list"].extend(iou.cpu().numpy())
                if self.config["eval_params"]["save_aggregated_outputs"][
                        "seg_coco"]:
                    for sample in sample_list:
                        annotations = save_utils.seg_coco(sample)
                        self.shared_dict["seg_coco_list"].extend(annotations)
                if self.config["eval_params"]["save_aggregated_outputs"][
                        "poly_coco"]:
                    for sample in sample_list:
                        annotations = save_utils.poly_coco(
                            sample["polygons"], sample["polygon_probs"],
                            sample["image_id"].item())
                        self.shared_dict["poly_coco_list"].append(
                            annotations
                        )  # annotations could be a dict, or a list
        # END of loop over samples

        # Save aggregated results
        if save_aggregated_outputs:
            self.barrier.wait(
            )  # Wait on all processes so that shared_dict is synchronized.
            if self.gpu == 0:
                if self.config["eval_params"]["save_aggregated_outputs"][
                        "stats"]:
                    print("Start saving stats:")
                    # Save sample_stats in CSV:
                    t1 = time.time()
                    stats_filepath = os.path.join(
                        self.eval_dirpath, "{}.stats.csv".format(split_name))
                    stats_file = open(stats_filepath, "w")
                    fnames = ["name", "iou"]
                    writer = csv.DictWriter(stats_file, fieldnames=fnames)
                    writer.writeheader()
                    for name, iou in sorted(zip(self.shared_dict["name_list"],
                                                self.shared_dict["iou_list"]),
                                            key=lambda pair: pair[0]):
                        writer.writerow({"name": name, "iou": iou})
                    stats_file.close()
                    print(f"Finished in {time.time() - t1:02}s")

                if self.config["eval_params"]["save_aggregated_outputs"][
                        "seg_coco"]:
                    print("Start saving seg_coco:")
                    t1 = time.time()
                    seg_coco_filepath = os.path.join(
                        self.eval_dirpath,
                        "{}.annotation.seg.json".format(split_name))
                    python_utils.save_json(
                        seg_coco_filepath,
                        list(self.shared_dict["seg_coco_list"]))
                    print(f"Finished in {time.time() - t1:02}s")

                if self.config["eval_params"]["save_aggregated_outputs"][
                        "poly_coco"]:
                    print("Start saving poly_coco:")
                    poly_coco_base_filepath = os.path.join(
                        self.eval_dirpath, f"{split_name}.annotation.poly")
                    t1 = time.time()
                    save_utils.save_poly_coco(
                        self.shared_dict["poly_coco_list"],
                        poly_coco_base_filepath)
                    print(f"Finished in {time.time() - t1:02}s")

        # Sync point of individual outputs
        if save_individual_outputs:
            print_utils.print_info(
                f"GPU {self.gpu} -> INFO: Finishing saving individual outputs."
            )
            saver_async.join()
            self.barrier.wait(
            )  # Wait on all processes so that all saver_asyncs are finished
    for iy, y0 in enumerate(np.arange(0, img_h, 5000)):
        for ix, x0 in enumerate(np.arange(0, img_w, 5000)):
            origins.append((x0, y0))

    alg = 'cellprofiler'

    big_labelmap = np.zeros((img_h, img_w), dtype=np.int64)
    n = 0
    for i, input_fp in enumerate(input_fps):
        prefix = os.path.splitext(input_fp)[0]
        labelmap = labelmap_alltiles[i].astype(np.int64) # astype(np.int64) is important, otherwise results in negative label values.
        x0, y0 = origins[i]
        big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n
        n += labelmap.max()

    labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg)
    bp.pack_ndarray_file(big_labelmap, labelmap_fp)
    upload_to_s3(labelmap_fp)
    
    for fp in input_fps:
        execute_command('rm ' + fp)        

t = time.time()

pool = Pool(NUM_CORES/2)
pool.map(detect_cells, filenames)
pool.close()
pool.join()

sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
Exemplo n.º 54
0
    def aggregate(self, feature_files):
        """
        This aggregator is a front-end to the pymir3 stats module. The statistics that must be computed
        are found in the simple_aggregation key in the experiment file.

        :param feature_files: a list of FeatureTrack filenames
        :type feature_files: list[str]
        :return:
        :rtype: None

        .. note::
            These keys are expected to be set in the experiment file:
                * ['simple_aggregation']['mean']
                * ['simple_aggregation']['delta']
                * ['simple_aggregation']['variance']
                * ['simple_aggregation']['acceleration']
                * ['simple_aggregation']['slope']
                * ['simple_aggregation']['limits']
                * ['simple_aggregation']['csv']
                * ['simple_aggregation']['normalize']
                * ['general']['scratch_directory']
                * ['feature_aggregation']['aggregated_output']

        """

        features = load_feature_files(feature_files)

        if self.params['simple_aggregation']['texture_windows']:

            #for i in range(len(feature_files)):
            #    feature_files[i] = feature_files[i] + "_tw"

            jobs = []
            out_idx = 0
            for f in features:
                jobs.append((
                    f,
                    self.params['simple_aggregation']['texture_window_length'],
                    feature_files[out_idx]))
                out_idx += 1

            num_files = len(jobs)
            output_buffer_size = self.params['simple_aggregation'][
                'tw_buffer_size']

            pool = Pool(
                processes=self.params['simple_aggregation']['tw_workers'])

            pool.map(calc_textures, jobs)

            # out_idx = 0

            # for i in range(0, num_files, output_buffer_size):
            #     print "Calculating texture windows %d through %d of %d" % (i + 1, min(i + output_buffer_size, num_files), num_files)

            #     result = pool.map(calc_textures, jobs[i:min(i + output_buffer_size, num_files)])

            #     for track in result:
            #         filename = feature_files[out_idx]
            #         print "writing features to file %s..." % (filename)
            #         feature_file = open(filename, "w")
            #         track.save(feature_file)
            #         feature_file.close()
            #         del track
            #         out_idx+=1

            #     del result
            #     gc.collect()

            pool.close()
            pool.join()
            features = None

        if features == None:
            features = load_feature_files(feature_files)

        stats = feat_stats.Stats()
        m = stats.stats(
            features,
            mean=self.params['simple_aggregation']['mean'],
            delta=self.params['simple_aggregation']['delta'],
            variance=self.params['simple_aggregation']['variance'],
            acceleration=self.params['simple_aggregation']['acceleration'],
            slope=self.params['simple_aggregation']['slope'],
            limits=self.params['simple_aggregation']['limits'],
            csv=self.params['simple_aggregation']['csv'],
            normalize=self.params['simple_aggregation']['normalize'])

        out = open(
            self.params['general']['scratch_directory'] + "/" +
            self.params['feature_aggregation']['aggregated_output'], "w")

        m.save(out)

        out.close()
Exemplo n.º 55
0
    rescale_factor = args.rescale_factor
else:
    w = args.width
    h = args.height

n_jobs = args.jobs


def worker(img_name):

    input_fp = input_fp_map[img_name]
    output_fp = output_fp_map[img_name]
    create_parent_dir_if_not_exists(output_fp)

    img = imread(input_fp)
    save_data(img[::1 / rescale_factor, ::1 / rescale_factor], output_fp)


pool = Pool(n_jobs)
_ = pool.map(worker, in_image_names)
pool.close()
pool.join()

# run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d  \"%%(output_fp)s\"' % \
#                 {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw},
#                 kwargs_list=[{'input_fp': ,
#                               'output_fp': output_fp_map[img_name]}
#                              for img_name in metadata_cache['valid_filenames'][stack]],
#                 argument_type='single',
#                jobs_per_node=1,
#                local_only=True)
Exemplo n.º 56
0
    sim_likelihood.increment_likelihood_legacy(simobs.dist_matrix,
                                               simobs.rts_matrix, **sim_params)

    fp_value = fp_likelihood.likelihood
    sim_value = sim_likelihood.likelihood
    return (fp_value, sim_value, i, j)


fp_likelihoods = np.zeros((100, 100))
sim_likelihoods = np.zeros((100, 100))
sigmas = np.linspace(0.2, 3, 100)
rewards = np.linspace(0.5, 1.5, 100)

arglists = it.product(zip(sigmas, range(100)), zip(rewards, range(100)))

pool = Pool(processes=22)
output_likelihoods = pool.map(comparisons, arglists)

for output in output_likelihoods:
    i, j = output[2:]
    fp_likelihoods[i, j] = output[0]
    sim_likelihoods[i, j] = output[1]

fw = open('/home/berk/Documents/fp_likelihoods_test.p', 'wb')
outdict = {
    'fp_likelihoods': fp_likelihoods,
    'sim_likelihoods': sim_likelihoods
}
pickle.dump(outdict, fw)
fw.close()
def main(lensname, dataname, work_dir='./'):
    main_path = os.getcwd()
    sys.path.append(work_dir + "config/")
    config = importlib.import_module("config_" + lensname + "_" + dataname)
    base_lcs = pycs.gen.util.readpickle(config.data)
    f = open(
        os.path.join(config.report_directory,
                     'report_optimisation_%s.txt' % config.simoptfctkw), 'w')

    if config.mltype == "splml":
        if config.forcen:
            ml_param = config.nmlspl
            string_ML = "nmlspl"
        else:
            ml_param = config.mlknotsteps
            string_ML = "knml"
    elif config.mltype == "polyml":
        ml_param = config.degree
        string_ML = "deg"
    else:
        raise RuntimeError(
            'I dont know your microlensing type. Choose "polyml" or "spml".')

    for a, kn in enumerate(config.knotstep):
        for b, ml in enumerate(ml_param):
            lcs = copy.deepcopy(base_lcs)
            destpath = os.path.join(
                main_path, config.lens_directory + config.combkw[a, b] + '/')
            print destpath
            ##### We start by shifting our curves "by eye", to get close to the result and help the optimisers to do a good job
            applyshifts(
                lcs, config.timeshifts,
                config.magshifts)  #be carefull, this remove ml as well...

            # We also give them a microlensing model (here, similar to Courbin 2011)
            config.attachml(
                lcs,
                ml)  #this is because they were saved as raw lcs, wihtout lcs.

            if config.max_core == None:
                nworkers = cpu_count()
            else:
                nworkers = config.max_core

            for c, opts in enumerate(config.optset):
                if config.simoptfctkw == "spl1":
                    kwargs = {'kn': kn, 'name': 'spl1'}
                elif config.simoptfctkw == "regdiff":
                    kwargs = config.kwargs_optimiser_simoptfct[c]
                else:
                    print "Error : simoptfctkw must be spl1 or regdiff"

                if config.run_on_copies:
                    print "I will run the optimiser on the copies with the parameters :", kwargs
                    p = Pool(nworkers)
                    if config.simoptfctkw == "spl1":
                        job_args = [
                            (j, config.simset_copy, lcs, config.simoptfct,
                             kwargs, opts, config.tsrand, destpath)
                            for j in range(nworkers)
                        ]
                        success_list_copies = p.map(exec_worker_copie_aux,
                                                    job_args)
                        # success_list_copies = [exec_worker_copie_aux(job_args[0])]# DEBUG

                    elif config.simoptfctkw == "regdiff":
                        if a == 0 and b == 0:  # for copies, run on only 1 (knstp,mlknstp) as it the same for others
                            job_args = (0, config.simset_copy, lcs,
                                        config.simoptfct, kwargs, opts,
                                        config.tsrand, destpath)
                            success_list_copies = exec_worker_copie_aux(
                                job_args)
                            success_list_copies = [
                                success_list_copies
                            ]  # we hace to turn it into a list to match spl format
                            dir_link = os.path.join(
                                destpath,
                                "sims_%s_opt_%s" % (config.simset_copy, opts))
                            print "Dir link :", dir_link
                            pkl.dump(
                                dir_link,
                                open(
                                    os.path.join(
                                        config.lens_directory,
                                        'regdiff_copies_link_%s.pkl' %
                                        kwargs['name']), 'w'))
                        # p.map(exec_worker_copie_aux, job_args)# because for some reason, regdiff does not like multiproc.
                    f.write('COPIES, kn%i, %s%i, optimiseur %s : \n' %
                            (kn, string_ML, ml, kwargs['name']))
                    write_report_optimisation(f, success_list_copies)
                    f.write('################### \n')

                if config.run_on_sims:
                    print "I will run the optimiser on the simulated lcs with the parameters :", kwargs
                    p = Pool(nworkers)
                    if config.simoptfctkw == "spl1":
                        job_args = [
                            (j, config.simset_mock, lcs, config.simoptfct,
                             kwargs, opts, config.tsrand, destpath)
                            for j in range(nworkers)
                        ]
                        success_list_simu = p.map(exec_worker_mocks_aux,
                                                  job_args)
                        # success_list_simu = [exec_worker_mocks_aux(job_args[0])] #DEBUG
                    elif config.simoptfctkw == "regdiff":
                        job_args = (0, config.simset_mock, lcs,
                                    config.simoptfct, kwargs, opts,
                                    config.tsrand, destpath)
                        success_list_simu = exec_worker_mocks_aux(
                            job_args
                        )  # because for some reason, regdiff does not like multiproc.
                        success_list_simu = [success_list_simu]
                        # p.map(exec_worker_copie_aux, job_args)
                    f.write('SIMULATIONS, kn%i, %s%i, optimiseur %s : \n' %
                            (kn, string_ML, ml, kwargs['name']))
                    write_report_optimisation(f, success_list_simu)
                    f.write('################### \n')

    print "OPTIMISATION DONE : report written in %s" % (os.path.join(
        config.report_directory,
        'report_optimisation_%s.txt' % config.simoptfctkw))
    f.close()
Exemplo n.º 58
0
from multiprocess import Pool


def f(x):
    return x * x


if __name__ == '__main__':

    p = Pool(4)
    result = p.map_async(f, range(10))
    print(result.get(timeout=1))
Exemplo n.º 59
0
    def error_rate(self, data, labels, other_bc_predict_args,
                   other_bc_predict_kwargs, process_count):
        """ Outputs the error rate of multiclass classifier for the given data and labels.

        Parameters
        ----------
        data : ndarray
            An ndarray where each row is a input vector consisting of the
            state of the visible units.
        labels : ndarray
            An ndarray where each element is the label/classification of a
            input vector in data for binary classification.
            Valid label values are -1 and 1.
        other_bc_predict_args : tuple
            Positional arguments to BinaryClassifier.predict not including input_vector.
        other_bc_predict_kwargs : dict
            Keyword arguments to BinaryClassifier.predict not including input_vector.
        process_count : int
            The number of worker processes to use when generating the predictions.

        Note the elements in data must correspond in sequence to the
        elements in labels.

        Returns
        -------
        float
            The error rate of the multiclass classifier for the given data
            and labels.
        """
        def binary_classifier_predict(binary_classifier, data,
                                      other_bc_predict_args,
                                      other_bc_predict_kwargs):
            """ Generate predictions for a specific binary classifier.

            Parameters
            ----------
            binary_classifier : binary classifier
                The binary classifier to generate predictions.
            data : ndarray
                Data to generate predictions for.
            other_bc_predict_args : tuple
                Positional arguments to BinaryClassifier.predict not including input_vector.
            other_bc_predict_kwargs : dict
                Keyword arguments to BinaryClassifier.predict not including input_vector.

            Returns
            -------
            list
                The predictions of the given binary classifier.
            """
            bc_data_scores = [
                binary_classifier.predict(input_vector, *other_bc_predict_args,
                                          **other_bc_predict_kwargs)
                for input_vector in data
            ]

            return bc_data_scores

        def error_callback(exc):
            """ Callback used by pool.apply_async when an error occurs.

            Parameters
            ----------
            exc : Exception
                Exception thrown by the process pool.apply_async was running in.
            """
            print(exc.__cause__)

        if process_count is not None and process_count > 1:
            # Unfortunately we cannot just use self.predict directly
            # (e.g. predictions = pool.map(self.predict, data)).
            # Instead must partially repeat what self.predict does here.
            binary_classifier_results = {}
            binary_classifier_scores = {}
            with Pool(processes=process_count) as pool:
                # Use the process pool to compute predictions of the binary classifiers.
                for (label_1, label_2
                     ), binary_classifier in self.binary_classifiers.items():
                    binary_classifier_results[(label_1, label_2)] = (
                        pool.apply_async(func=binary_classifier_predict,
                                         args=(binary_classifier, data,
                                               other_bc_predict_args,
                                               other_bc_predict_kwargs),
                                         error_callback=error_callback))

                # Retrieve the binary classifier scores from the process pool.
                for (label_1, label_2) in self.binary_classifiers.keys():
                    binary_classifier_scores[(label_1, label_2)] = (
                        binary_classifier_results[(label_1, label_2)].get())

            # Generate list of predictions for each data element based on the predictions of
            # the underlying binary classifiers.
            predictions = []
            if self.strategy in ('OVA', 'OVO'):
                # Compute a confidence score for each label and set the predicted label to be
                # the one with the highest score.
                # Same technique as in self.predict with the difference here being
                # binary_classifier_scores values are not a single value and instead are an
                # iterable of scores for each data element.
                for i in range(len(data)):
                    label_scores = {
                        label:
                        sum(scores[i] if label == label_1 else -scores[i]
                            for (label_1, label_2
                                 ), scores in binary_classifier_scores.items()
                            if label in (label_1, label_2))
                        for label in self.possible_labels
                    }
                    predicted_label = max(label_scores, key=label_scores.get)
                    predictions.append(predicted_label)
            else:
                raise NotImplementedError(self.strategy)

            predictions = np.asarray(predictions, dtype=labels.dtype)

        else:
            # Generate list of predictions for each data element using self.predict.
            predictions = np.asarray([
                self.predict(input_vector, other_bc_predict_args,
                             other_bc_predict_kwargs) for input_vector in data
            ],
                                     dtype=labels.dtype)

        # Gather the results of the predictions; prediction_results is an ndarray corresponding
        # to the predictions and the labels for the data with True meaning the prediction matched
        # the label and False meaning it did not.
        prediction_results = (predictions == labels)
        # Note the number of incorrect prediction results
        # (i.e. the number of False entries in prediction_results).
        num_incorrect_prediction_results = np.sum(~prediction_results)
        # Note the number of results.
        num_prediction_results = prediction_results.shape[0]
        # Compute the error rate.
        error_rate = num_incorrect_prediction_results / num_prediction_results

        return error_rate
Exemplo n.º 60
0
def all_countries(base_path,
                  multiprocess=True,
                  overwrite=True,
                  savefig=False,
                  report=False):
    """
    Main function to estimate the length of all the roads and countries we are interested in. 

    Args:
        *base_path* : Base path to the location of all files and directories in this project.
        
        *multiprocess* : Set to True by default. Set to False in the case of limited processing power.
        
        *overwrite* : Set to True by default. This relates to all input data (i.e. .poly files, .osm.pbf files and shapefiles).

        *savefig* : Set to False by default. When set to True, it will return a figure with the roads of a country.

    Returns:
        An Excel file with the length of all **Primary**, **Secondary**, **Tertiary**, **Track** and **Other** roads for each country.
    
    """

    print('The calculation of road lenghts has started!')
    start = time.time()

    # =============================================================================
    #     """ Set path to dirs"""
    # =============================================================================
    dir_out = os.path.join(base_path, 'output_data')
    poly_dir = os.path.join(base_path, 'poly_files')
    osm_path_in = os.path.join(base_path, 'osm_continent')
    fig_dir = os.path.join(base_path, 'Figures')

    # =============================================================================
    #     """ create directories if they are not created yet """
    # =============================================================================
    if not os.path.exists(dir_out):
        os.makedirs(dir_out)

    if not os.path.exists(poly_dir):
        os.makedirs(poly_dir)

    if not os.path.exists(osm_path_in):
        os.makedirs(osm_path_in)

    if (savefig == True) and not os.path.exists(fig_dir):
        os.makedirs(fig_dir)
# =============================================================================
#     """Set path to files we use """
# =============================================================================
    wb_country_in = os.path.join(base_path, 'input_data', 'wbccodes2014.csv')
    global_shape = os.path.join(base_path, 'input_data',
                                '2015_GAUL_Dataset_Mod.gdb')

    # =============================================================================
    #     """Load country shapes and list and only save the required countries"""
    # =============================================================================
    wb_country = pd.read_csv(wb_country_in, header=0, index_col=0)

    #filter high income countries from country file
    country_list = wb_country[['country', 'continent'
                               ]].loc[wb_country['wbregion'] != 'YHI']

    # add column to country list so we can easily look up the required continental
    # osm file for that continent
    map_continent = {
        'MA': 'central-america',
        'SA': 'south-america',
        'EU': 'europe',
        'AS': 'asia',
        'AU': 'australia-oceania',
        'AF': 'africa',
        'AM': 'north-america'
    }

    country_list['osm-cont'] = country_list['continent'].map(
        lambda x: (map_continent[x]))

    # =============================================================================
    #     """ create .poly files to clip countries from osm.pbf files """
    # =============================================================================
    if not os.listdir(poly_dir):
        create_poly_files(base_path, global_shape, save_shapefile=overwrite)
# =============================================================================
# """ check if we have actually downloaded the openstreetmap input files. If not,
# lets download them. Note: this will take a while! """
# =============================================================================
    continent_list = [
        'central-america', 'south-america', 'europe', 'asia',
        'australia-oceania', 'africa', 'north-america'
    ]

    for continent in continent_list:
        url = 'http://download.geofabrik.de/%s-latest.osm.pbf' % continent
        if '%s-latest.osm.pbf' % (continent) not in os.listdir(osm_path_in):
            urllib.request.urlretrieve(url, osm_path_in)

# =============================================================================
#     """ create extracted osm files for each country per continent """
# =============================================================================
    out = []
    countries = []
    continent_osms = []
    base_paths = []
    overwrites = []
    savefigs = []
    reporting = []
    for country in country_list.iterrows():
        country = country[1]
        continent_osm = os.path.join(
            osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont']))
        countries.append(country['country'])
        continent_osms.append(continent_osm)
        base_paths.append(base_path)
        overwrites.append(overwrite)
        savefigs.append(savefig)
        reporting.append(report)

    # multiprocessing will start if set to True. Set to False with limited processing capacities
    if multiprocess == True:
        pool = Pool(cpu_count() - 1)
        out = pool.starmap(
            single_country,
            zip(countries, continent_osms, base_paths, overwrites, savefigs,
                reporting))

    # when multiprocessing set to False, we will just loop over the countries.
    else:
        out = []
        i = 0
        for country in country_list.iterrows():
            country = country[1]
            continent_osm = os.path.join(
                osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont']))
            out.append(
                single_country(country['country'], continent_osm, base_path,
                               overwrites[i], savefigs[i], reporting[i]))
            i += 1

    df = pd.concat(out, axis=1).T

    map_country = dict(zip(wb_country['country'], wb_country['country_name']))
    df['Country'] = df.index.to_series().map(map_country)

    df.set_index('Country', inplace=True, drop=True)

    writer = pd.ExcelWriter(os.path.join(dir_out, 'dist_roads.xlsx'))
    df.to_excel(writer, 'output')
    writer.save()

    end = time.time()

    print('It took ' + str(np.float16((end - start))) + " seconds to finish!")