Python Pool示例，multiprocessing.pool.Pool Python示例

示例#1

0

显示文件

def split_4d_for_all_pat(files_paths, split_folder):
    p = pool.Pool(8)
    p.map(
        split_4d_parallel,
        zip(files_paths, [split_folder] * len(files_paths),
            [None] * len(files_paths)))

示例#2

0

显示文件

文件： bnb_rand.py 项目： Chloe-Yu/Visual-Story

    


if __name__=='__main__':
    import glob

    # file = glob.glob("C:/Users/lily/Downloads/aliwood_product_dataset/aliwood_product_dataset/*")[1:]
    file = glob.glob('/home/liuchang/TaobaoItem/results/*/')

    import os.path
    import pickle
    import sys
    import time
    import math
    worker=32
    pool=pool.Pool(worker)
    le_dic={}
    lfile=open('/home/liuchang/PA/length','rb')
    lfile = pickle.load(lfile)
    for i in range(len(lfile)//2):
      le_dic[lfile[2*i]] = lfile[2*i+1]

    for material_dir in file:
         results=[]
         dynamic_list=[]
         count=0
         id = material_dir.split('/')[-2]
         print(id)
         S = le_dic[material_dir]
         print(S)
         file_list = get_all_image(material_dir)

示例#3

0

显示文件

文件： cloud_tpu.py 项目： eyaler/autotransformer

def create_vm_tpu_pair(vm_name,
                       tpu_name,
                       reuse_if_exists=True,
                       skip_confirmation=False):
    """Create a VM and paired TPU instance.

  Args:
    vm_name: str, name for VM.
    tpu_name: str, name for TPU instance.
    reuse_if_exists: bool, if True, this will act as a get or create. If False
      and vm_name or tpu_name already exists, will error.
    skip_confirmation: bool, whether to skip launch confirmations.

  Returns:
    tuple: (vm_ip, tpu_ip)

  Raises:
    ValueError: if instance exists but reuse_if_exists=False.
  """
    vm_info = list_vm_names_and_ips()
    tpu_info = list_tpu_names_and_ips()

    vm_names = list(zip(*vm_info))[0] if vm_info else []
    tpu_names = list(zip(*tpu_info))[0] if tpu_info else []

    make_vm = False
    vm_ip = None
    if vm_name in vm_names:
        if not reuse_if_exists:
            raise ValueError("VM %s already exists and reuse_if_exists=False" %
                             vm_name)
        tf.logging.info("VM %s already exists, reusing.", vm_name)
        vm_ip = vm_info[vm_names.index(vm_name)][1]
    else:
        print("Creating VM %s" % vm_name)
        if not skip_confirmation:
            assert confirm()
        make_vm = True

    make_tpu = False
    tpu_ip = None
    if tpu_name in tpu_names:
        if not reuse_if_exists:
            raise ValueError(
                "TPU instance %s already exists and reuse_if_exists=False" %
                tpu_name)
        tf.logging.info("TPU %s already exists, reusing.", tpu_name)
        tpu_ip = tpu_info[tpu_names.index(tpu_name)][1]
    else:
        print("Creating TPU instance %s" % tpu_name)
        if not skip_confirmation:
            assert confirm()
        make_tpu = True

    # Create VM and TPU in parallel
    pool = mp.Pool(2)
    vm_res = None
    tpu_res = None
    if make_vm:
        vm_res = pool.apply_async(create_vm, (vm_name, ))
    if make_tpu:
        tpu_res = pool.apply_async(create_tpu, (tpu_name, tpu_info))
    if vm_res is not None:
        vm_ip = vm_res.get()
    if tpu_res is not None:
        tpu_ip = tpu_res.get()

    tf.logging.info("VM (Name, IP): %s, %s", vm_name, vm_ip)
    tf.logging.info("TPU (Name, IP): %s, %s", tpu_name, tpu_ip)
    tf.logging.info("To delete the VM, run: %s",
                    Gcloud.DELETE_VM.format(name=vm_name))
    tf.logging.info("To delete the TPU instance, run: %s",
                    Gcloud.DELETE_TPU.format(name=tpu_name))
    return vm_ip, tpu_ip

示例#4

0

显示文件

文件： dt_offset_generator.py 项目： xiezhonghua/openseg.pytorch

                "depth": depth_map,
                'deg_reduce': deg_reduce
            },
            do_compression=False,
        )


parser = argparse.ArgumentParser()
parser.add_argument("--datadir",
                    dest='datadir',
                    default=osp.join(DATA_ROOT, 'cityscapes'))
parser.add_argument("--outname", default='offset_gt/dt_offset')
parser.add_argument('--split', nargs='+', default=['val', 'train'])
parser.add_argument("--ksize", type=int, default=5)
parser.add_argument('--metric', default='euc', choices=['euc', 'taxicab'])
args = parser.parse_args()

ksize = args.ksize

sobel_x, sobel_y = (sobel_kernel((ksize, ksize), i) for i in (0, 1))
sobel_ker = torch.cat([sobel_y, sobel_x], dim=0).view(2, 1, ksize,
                                                      ksize).float()

for dataset in args.split:
    indir = osp.join(args.datadir, dataset, 'label')
    outdir = osp.join(args.datadir, dataset, args.outname)
    os.makedirs(outdir, exist_ok=True)
    args_to_apply = [(indir, outdir, osp.basename(basename))
                     for basename in glob(osp.join(indir, "*.png"))]
    mpp.Pool(processes=mp.cpu_count() // 2).map(process, args_to_apply)

示例#5

0

显示文件

文件： multi_thread.py 项目： Jeff654/my_machine_learning

    #     t = Thread(target=loop)
    #     # t = Process(target=loop)
    #     print "the index is: ", index, time.time()
    #     t.start()
    # while True:
    #     pass

    import time
    # start_time = time.time()
    # my_list = range(100000000)
    # result = [f(x) for x in my_list]
    # end_time = time.time()
    # print "before: ", end_time - start_time

    from multiprocessing import pool
    p = pool.Pool(4)
    # lst = range(100000000)
    # p.map(f, lst)
    # print "after: ", time.time() - end_time

    my_parameter_list = [(index, index + 1) for index in range(100)]
    start_time = time.time()
    result = p.map(my_add, my_parameter_list)
    # # result = p.starmap(local_add, my_parameter_list)
    print(result)
    print time.time() - start_time

    # print "*****************************************************"
    # p = Pool(4)
    # result = p.starmap(local_add, my_parameter_list)        # 3.3 version new characteristic
    # print result

示例#6

0

显示文件

文件： make_seg_mask.py 项目： yueyedeai/CCPD2COCO

) + '/stuffthingmaps/train2017/'  # 在当前目录下新建文件夹，用于存储map中间结果图
if not os.path.exists(stuffmap_file_path):
    os.makedirs(stuffmap_file_path)  # 多层目录

IMAGE_DIR = Path("./map/")  # 要处理的map图片目录
im_files = [f for f in IMAGE_DIR.iterdir()]

# 进度条
w = progressbar.widgets
widgets = [
    'Progress: ',
    w.Percentage(), ' ',
    w.Bar('#'), ' ',
    w.Timer(), ' ',
    w.ETA(), ' ',
    w.FileTransferSpeed()
]
progress = progressbar.ProgressBar(widgets=widgets)


def write_image(im_file):
    img_cv = cv2.imread(str(im_file), 0)
    cv2.imwrite(stuffmap_file_path + str(im_file.stem) + ".png", img_cv)


myPool = pool.Pool(processes=4)  # 并行化处理
for im_file in progress(im_files):
    myPool.apply_async(func=write_image, args=(im_file, ))
myPool.close()
myPool.join()

示例#7

0

显示文件

def main():
    parser = argparse.ArgumentParser(
        description="Evaluate algorithm based on features")
    parser.add_argument('--model_name', required=True, nargs='+',
                        help='Model Name')
    parser.add_argument('--root', default="../data/ALISC",
                        help="Root folder of all data. default in ../data/")
    parser.add_argument('--weights', nargs='+',
                        help="Weights for each model in multiple-model case")
    c = vars(parser.parse_args())
    c = vars(parser.parse_args())
    model_names = c['model_name']
    # read groundtruth
    DATA_ROOT = c['root']
    print("1. Reading groudtruth...")
    valid_fn = os.path.join(DATA_ROOT, 'eval_tags/valid_image.txt')
    valid_ids, gt = read_alisc_groundtruth(valid_fn)

    dis_mats = []
    for model_name in model_names:
        print("**********************Model {}********************".format(
            model_name))
        print("2. Reading features for query images for model [{}]...".format(
            model_name))
        valid_features = np.load(os.path.join(
            DATA_ROOT, 'query_features', model_name, 'feature.npy'
        ))
        print("3. Reading features for eval images for model [{}]...".format(
            model_name))
        eval_ids, eval_features = read_feature(
            os.path.join(DATA_ROOT, 'eval_features', model_name))

        print("4. Calculating Distances...")
        distance_type = 'cosine'
        dis_mat_ = calculate_distance_mat(
            valid_features, eval_features, distance_type=distance_type)
        dis_mats.append(dis_mat_)

    if c['weights'] is not None:
        weights = [float(x) for x in c['weights']]
    else:
        # take average weights
        weights = np.ones(len(model_names)) / len(model_names)
    print "Wegiths", weights
    # merge distance mats
    dis_mat = np.zeros(dis_mats[0].shape)
    for i in range(len(weights)):
        dis_mat += weights[i] * dis_mats[i]

    return_lists = {}
    knn = []
    aps = []
    print("5. Evaluate each images...")
    valid_MAP = 0

    pool = mp.Pool(16)
    results = [
        pool.apply_async(search_k_smallest, args=(
            dis_mat[i, ...].ravel(), 20)
        )
        for i in range(len(valid_ids))]
    for r in results:
        knn.append([x[0] for x in r.get()])

    for i in range(len(knn)):
        valid_id = valid_ids[i]
        top_k = [eval_ids[x] for x in knn[i]]
        return_lists[valid_id] = top_k
        ap = eval_ap(top_k, gt[i])
        print("ImageID: {} / AP = {}".format(valid_id, ap))
        valid_MAP += ap
        aps.append(ap)
    valid_MAP /= len(valid_ids)

    # save results
    with open('./{}_MAP.txt'.format("_".join(model_names)), 'w') as fp:
        fp.write('\n'.join([str(x) for x in aps]))
        fp.write('\n')
        fp.write('MAP: ')
        fp.write(str(valid_MAP))
    print("MAP= ", valid_MAP)

    with open('./{}_list.txt'.format("_".join(model_names)), 'w') as fp:
        # pickle.dump(return_lists, fp)
        for key, value in return_lists.iteritems():
            fp.write('{},{}\n'.format(key, ';'.join(value)))

示例#8

0

显示文件

def get_pool(processes=None,
             initializer=None,
             initargs=(),
             maxtasksperchild=None):
    return pool.Pool(processes, initializer, initargs, maxtasksperchild)

示例#9

0

显示文件

文件： face_recognition_pool.py 项目： dariuszlee/countr_face_recognition

def get_pool():
    return pool.Pool(1, init_pool)

示例#10

0

显示文件

if __name__ == '__main__':
    # uncomment next lines to set the random seed static instead of different on every run
    # seed = 7857863  # some integer
    # random.seed(a=seed)
    # np.random.seed(seed=seed)

    ns = [10, 20, 50, 100, 200, 400, 1000, 2000, 4000]
    m = 100

    for n in ns:

        w = np.random.pareto(a=1.5, size=n)
        w.sort()

        func = partial(worker, w=w)
        p = pool.Pool(processes=10)
        degrees = np.array(p.map(func, range(m)))

        avg = np.zeros((n, ), dtype=np.float)
        var = np.zeros((n, ), dtype=np.float)
        for i in range(n):
            dis = degrees[:, i]
            avg[i] = np.average(dis)
            var[i] = np.var(dis)

        plt.figure()
        for i in range(n):
            dis = degrees[:, i]
            c = Counter(dis)
            maxval = max(c.values())
            counts = np.array([(k, v) for (k, v) in c.items()])

示例#11

0

显示文件

def cesareans_output():
  #pull 'birth_month' from input field and store it
  
  review_text = request.args.get('text_review')
  city_state_dict = [dict(city=city) for city in city_state_list]
  city, state = request.args.get('sel_city').split(",")
  # SearchBusinesses(review)
  top_n = 5 # Number of topics to choose for top of list. 
  rev_topic = np.array(vectorsearch.GetDocTopic(review_text))
  # Get the top few topics for this review. 
  top_n_topics = rev_topic.argsort()[-top_n:][::-1]
  # print rev_topic # Print the topic vector. 

  with open("query_history.txt", "a") as myfile:
    myfile.write("\n!@# " + review_text)

  bus_ids_in_city_state = get_bus_ids_city_state(city.strip(), state.strip())

  topic_listings = [" ".join(vectorsearch.GetTopicWords(topic, ))  for topic in top_n_topics]

  start = time.time()

  #top_bus_id, top_bus_sim = vectorsearch.FindBusinessSimilarityLDA(rev_topic, business_ids=bus_ids_in_city_state, method='Hel', top_n=30)
  top_bus_id, top_bus_sim = vectorsearch.FindBusinessSimilaritydoc2vec(review_text, bus_ids_in_city_state, top_n=50)
  print "Similarity took", time.time()-start, "seconds" 
  #print topic_listings

  # Check that the names are not already included. 
  names, valid_biz, biz_sims = [], [], [] 
  for i_bus, bus_id in enumerate(top_bus_id):
    name = df_businesses.name[df_businesses.business_id==bus_id].values[0]
    if name not in names:
      names.append(name)
      valid_biz.append(bus_id)
      biz_sims.append(top_bus_sim[i_bus])
      print name, top_bus_sim[i_bus] 

  top_bus_id  = valid_biz
  top_bus_sim = biz_sims


  # Visualize the search query.....
  img_path_query = '/images/insight/query_'+str(uuid.uuid4()) + '.png'
  #vectorsearch.visualize_topic(rev_topic, num_topics=top_n, save_path='/home/carlson/web/'+img_path_query)
  # Find the top businesses.
  top_businesses = [] 
  words_paths = [] 

  for i, bus_id in enumerate(top_bus_id[:20]):
      # This is the full topic array for the business. 
      bus_topic_vec = vectorsearch.bus_lda_topics[vectorsearch.bus_lda_topics.business_id==bus_id].topic_vector.values[0]

      img_path = '/images/insight/'+bus_id+'.png'
      #print 'Generating image ', img_path
      lat = df_businesses.latitude[df_businesses.business_id==bus_id].values[0]
      lon = df_businesses.longitude[df_businesses.business_id==bus_id].values[0]
      URL = df_businesses.URL[df_businesses.business_id==bus_id].values[0]
      image_URL = df_businesses.image_URL[df_businesses.business_id==bus_id].values[0]
      words = bus_reviews[bus_id]
      words_paths.append((words, img_path))
      #vectorsearch.visualize_topic(bus_topic_vec, num_topics=top_n, save_path='/home/carlson/web/'+img_path, top_topics=top_n_topics)
      # Append to list that gets passed to web page...
      top_businesses.append(dict(bus_id=bus_id, similarity=top_bus_sim[i], image_path='http://planck.ucsc.edu/'+img_path,
                            bus_name="%i. "%(i+1) + df_businesses.name[df_businesses.business_id==bus_id].values[0],
                            lat=lat, lon=lon, URL=URL, image_URL=image_URL))

  # Generate word clouds
  p = pool.Pool(12)
  p.map(gen_word_cloud, words_paths)
  p.close()
  p.join()


  centroid_lat = np.average([biz['lat'] for biz in top_businesses])
  centroid_lon = np.average([biz['lon'] for biz in top_businesses])

  # Generate map....
  map_path = img_path[:-4]+'.html'
  print "\nPATH TO MAP, lat, lon", map_path, '\n', centroid_lat, centroid_lon
  map_osm = folium.Map(location=[centroid_lat, centroid_lon], zoom_start=13, detect_retina=True, 
                    tiles='stamentoner', attr='Map tiles by <a href="http://stamen.com">Stamen Design</a>, under <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a>. Data by <a href="http://openstreetmap.org">OpenStreetMap</a>, under <a href="http://creativecommons.org/licenses/by-sa/3.0">CC BY SA</a>.')               

  # map_osm = folium.Map(location=[centroid_lat, centroid_lon], zoom_start=13, detect_retina=True, 
  #                   tiles='http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png', attr='&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')               

  # map_osm.add_tile_layer(tile_url='http://tile.stamen.com/toner-labels/{z}/{x}/{y}.png', attr='labels',
  #                        active=True, overlay=True)

  for business in top_businesses[:]:

    html = r'''<div align="center"> <font size="4"><a href="'''+business['URL'] +'''"> <b>'''+business['bus_name']+'''</b></a></font> <br><img src="'''+business['image_path']+'''" alt="NOPE" style="width:250px;height:125px;"></div>'''
    iframe = folium.element.IFrame(html=html,width=300,height=175)
    popup = folium.Popup(html=iframe)
    
    icon = folium.Icon(color="blue", icon="ok")
    marker = folium.Marker(location=[business['lat'], business['lon']], popup=popup, icon=icon)
    map_osm.add_children(marker)


  heatmap_events = [(df_businesses.latitude[df_businesses.business_id==bus_id].values[0], 
                     df_businesses.longitude[df_businesses.business_id==bus_id].values[0], 
                     -top_bus_sim[i]+top_bus_sim[0]) for i, bus_id in enumerate(top_bus_id)]

  lats = sims_array = np.array(heatmap_events)[:,0]
  lons = sims_array = np.array(heatmap_events)[:,1]
  sims_array = np.array(heatmap_events)[:,2]
  scale = top_bus_sim[6]-top_bus_sim[0]
  sims_array = ((1-1/(np.exp(sims_array/scale)+1))*50).astype(np.int32)

  heatmap = [] 
  for i, sim in enumerate(sims_array):
      for j in range(sim):
          heatmap += [[lats[i]+.00001*j, lons[i]]]


  map_osm.add_children(plugins.HeatMap(heatmap, max_zoom=18, radius=25, max_val=20))

  map_osm.save('/home/carlson/web/'+map_path)

  append_mousemove_js('/home/carlson/web/'+map_path)


  return render_template("output.html", review_text=review_text, topic_listings=topic_listings, top_businesses=top_businesses,
                          image_path_query='http://planck.ucsc.edu/'+img_path_query, map_path='http://planck.ucsc.edu/'+map_path,
                          city_state_list=city_state_dict)

示例#12

0

显示文件

文件： web.py 项目： znes/openmod.sh

from .bookkeeping import PointIds, InMemorySessionInterface as IMSI
from .schemas import oms as schema



app = flask.Flask(__name__)
# For production deployment: generate a different one via Python's `os.urandom`
# and store it in a safe place.
# See: http://flask.pocoo.org/docs/0.11/quickstart/#sessions
app.secret_key = b"DON'T USE THIS IN PRODUCTION! " + b'\xdb\xcd\xb4\x8cp'
app.session_interface = IMSI()

# Set up a pool of workers to which jobs can be submitted and a dictionary
# which stores the asynchronous result objects.
app.workers = mpp.Pool(4)
app.results = {}

##### Utility Functions #######################################################
#
# Some functions used throughout this module (and maybe even elsewhere.)
#
# This should probably go into it's own module but I'm putting it all here for
# now, as some parts need to stay in this module while some parts can be
# factored out later. The 'factoring out' part can be considered an open TODO.
#
###############################################################################

def xml_response(template):
    response = flask.make_response(template)
    response.headers['Content-Type'] = 'text/xml'

示例#13

0

显示文件

            simple_shp_2 = New_shp(boundbox[0], boundbox[1], boundbox[2],
                                   boundbox[3], boundbox[4], boundbox[5],
                                   boundbox[6], boundbox[7], boundbox[8])
            shp_2 = BRepAlgoAPI_Cut(shp, simple_shp_2).Shape()
            converter.export_shape_to_svg(shape=shp_2,
                                          filename=MotherDic + item + ".svg",
                                          proj_ax=converter.DIRS["2"],
                                          scale=sc)
        return 1
    except Exception as re:
        shutil.rmtree(MotherDic)
        print(MotherDic + "has been removed")
        print(fname + ' failed, due to: {}'.format(re))
        return 0

p = pool.Pool(processes=args.n_cores)
f = partial(Generate_task)

t0 = time.time()

mask = p.map(f, fnames)
Mask = np.asarray(mask)
label_valid = np.delete(label, np.where(Mask == 0))

label_valid = [int(i) for i in label_valid]

dirs_valid = np.delete(np.array(dirs), np.where(Mask == 0))

Answer = dict(zip(dirs_valid, label_valid))

fname_answer = os.path.join(pathwrite, 'answer.json')

示例#14

0

显示文件

def optimize_population(N, sigmas, fixed_args, gamma_init=1. / 250.,
                        parallel=True):
    """ Function for optimizing the population

    :param N: int
        Number of neurons.
    :param sigmas: np.array
        White noise amplitude values (will not be optimized).
    :param fixed_args: list
        All fixed parameters of the optimization procedure.
    :param gamma_init: float
        Initial value of process jump rate (kHz). Default=1. / 250.
    :param parallel: bool
        Whether one wants to parallelize the procedure (approximate, but much
        faster) or do updates sequentially (slow, but less approximative).
        Default=True
    :return: list
        Optimal model variables.
    """
    proc_params = gamma_init, 0., 1.
    pop_params = np.zeros(N), -6.5 * np.ones(N), sigmas
    model_variables = proc_params, pop_params

    print('Optimize Mu')

    if parallel:
        #num_cpu = 10
        num_cpu = np.amin([N, int(np.floor(.9*cpu_count()))])
        p = pool.Pool(num_cpu)
        mu_results = p.map(partial(parallel_simplex_mu_fit,
                            model_variables=model_variables,
                      fixed_args=fixed_args), range(N))
        p.close()
        proc_params, pop_params = model_variables
        Cs, mus, sigmas = pop_params
        mus = np.array(mu_results)
        pop_params = Cs, mus, sigmas
        model_variables = proc_params, pop_params
    else:
        for ineuron in range(N):
            print('Neuron %d' %ineuron)
            opt_res = minimize_scalar(simplex_mu_fit_wrapper_mllk,
                                      bracket=[-6.5,-4.], bounds=[-8.,-3.],
                                      method='brent', args=(ineuron,
                                                            model_variables,
                                                            fixed_args),
                                  options={'xtol': 1e-3})
            opt_mu = opt_res.x
            proc_params, pop_params = model_variables
            Cs, mus, sigmas = pop_params
            mus[ineuron] = opt_mu
            pop_params = Cs, mus, sigmas
            model_variables = proc_params, pop_params

    print('Optimize C/gamma shared')
    converged = False
    mllk_cur = -np.inf
    opt_shared_C = .5
    opt_gamma = 1./500.
    while not converged:
        mllk_old = mllk_cur
        print('Optimize C')
        if parallel:
            #num_cpu = 10
            num_cpu = np.amin([N, int(np.floor(.9 * cpu_count()))])
            p = pool.Pool(num_cpu)
            Cs_fit = p.map(partial(parallel_simplex_C_fit,
                                   C_init=opt_shared_C,
                                   model_variables=model_variables,
                                   fixed_args=fixed_args), range(N))
            p.close()
        else:
            Cs_fit = np.empty(N)
            for ineuron in range(N):
                print('Neuron %d' % ineuron)
                opt_res = minimize_scalar(simplex_C_fit_wrapper_mllk,
                                          bracket=[.5 * opt_shared_C,
                                                   2. * opt_shared_C],
                                          bounds=[0., 2.],
                                          method='brent', args=(ineuron,
                                                                model_variables,
                                                                fixed_args),
                                          options={'xtol': 1e-3})
                opt_C = opt_res.x
                Cs_fit[ineuron] = opt_C

        proc_params, pop_params = model_variables
        Cs, mus, sigmas = pop_params
        pop_params = np.array(Cs_fit), mus, sigmas
        model_variables = proc_params, pop_params

        print('Optimize tau')

        opt_res = minimize_scalar(simplex_gamma_fit_wrapper_mllk,
                                  bracket=[.5*opt_gamma, 2.*opt_gamma],
                                  bounds=[1./2e3, 1./50.],
                                  method='brent', args=(model_variables,
                                                        fixed_args),
                                  options={'xtol': 1e-3})
        opt_gamma = opt_res.x
        mllk_cur = -opt_res.fun
        proc_params, pop_params = model_variables
        gamma_jump, x_bar_jump, zeta_jump = proc_params
        proc_params = opt_gamma, x_bar_jump, zeta_jump
        model_variables = proc_params, pop_params

        convergence = -(mllk_cur - mllk_old)/mllk_cur
        converged = convergence < 1e-3

    print('Optimize mu and C')
    if parallel:
        #num_cpu = 10
        num_cpu = np.amin([N, int(np.floor(.9 * cpu_count()))])
        p = pool.Pool(num_cpu)
        mus_Cs_fit = p.map(partial(parallel_simplex_mu_C_fit,
                                   model_variables=model_variables,
                                   fixed_args=fixed_args), range(N))
        p.close()
        mus_fit, Cs_fit = np.empty(N), np.empty(N)
        for ineuron in range(N):
            mus_fit[ineuron] = mus_Cs_fit[ineuron][0]
            Cs_fit[ineuron] = mus_Cs_fit[ineuron][1]

    else:
        mus_fit, Cs_fit = np.empty(N), np.empty(N)
        for ineuron in range(N):
            print('Neuron %d' % ineuron)
            init_variables = np.array([-4, .5])
            initial_simplex = np.array([[-3., .1],
                                        [-7., .5],
                                        [-5., .7]])
            opt_res = minimize(simplex_mu_C_fit_wrapper_mllk, x0=init_variables,
                               method='Nelder-Mead', args=(ineuron,
                                                           model_variables,
                                                           fixed_args),
                               tol=1e-3, options={
                    'initial_simplex': initial_simplex})
            opt_mu, opt_C = opt_res.x[0], opt_res.x[1]
            mus_fit[ineuron] = opt_mu
            Cs_fit[ineuron] = opt_C

    proc_params, pop_params = model_variables
    Cs, mus, sigmas = pop_params
    pop_params = np.array(Cs_fit), np.array(mus_fit), sigmas
    model_variables = proc_params, pop_params

    return model_variables

示例#15

0

显示文件

文件： table_cell_to_coco.py 项目： xiaoyubing/TableCell

def main():
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }

    image_id = 1
    annotation_id = 1

    im_files = [f for f in IMAGE_DIR.iterdir()]
    im_files.sort(key=lambda f: f.stem, reverse=True)

    an_files = [f for f in ANNOTATION_DIR.iterdir()]
    an_files.sort(key=lambda f: f.stem, reverse=True)

    assert len(an_files) == len(im_files), \
        "#images does not equal to #labels, please run diff_two_folder.py，and delete the mis-match file."

    for im_file, an_file in zip(im_files, an_files):
        image = Image.open(im_file)
        im_info = pycococreatortools.create_image_info(image_id, im_file.name,
                                                       image.size)
        coco_output['images'].append(im_info)
        myPool = pool.Pool(processes=16)

        annotation_info_list = []

        with open(an_file, 'r') as f:
            datas = json.load(f)
            for i in range(len(datas)):
                data = datas[i]
                # print(data)
                bounding_box = get_info(data)[0]
                segmentation = get_info(data)[1]

                class_id = 1

                print(bounding_box, segmentation)
                area = bounding_box[-1] * bounding_box[-2]
                an_infos = pycococreatortools.mask_create_annotation_info(
                    annotation_id=annotation_id,
                    image_id=image_id,
                    category_id=class_id,
                    area=area,
                    image_size=image.size,
                    bounding_box=bounding_box,
                    segmentation=segmentation)
                annotation_info_list.append(an_infos)
                annotation_id += 1

        myPool.close()
        myPool.join()

        for annotation_info in annotation_info_list:
            if annotation_info is not None:
                coco_output['annotations'].append(annotation_info)
        image_id += 1

    print("[INFO]: Saving annotations")
    output_json = Path(RESULT_JSON_DIR)
    with output_json.open('w', encoding='utf-8') as f:
        json.dump(coco_output, f)
    print("[INFO]: Annotations JSON file saved in：", str(output_json))

示例#16

0

显示文件

文件： ga.py 项目： batu/gameAI_p6

def ga():
    # STUDENT Feel free to play with this parameter
    pop_limit = 240
    # Code to parallelize some computations
    batches = os.cpu_count()
    if pop_limit % batches != 0:
        print("It's ideal if pop_limit divides evenly into " + str(batches) +
              " batches.")
    batch_size = int(math.ceil(pop_limit / batches))
    with mpool.Pool(processes=os.cpu_count()) as pool:
        init_time = time.time()
        # STUDENT (Optional) change population initialization
        population = [
            Individual.random_individual()
            if random.random() < 0.9 else Individual.empty_individual()
            for _g in range(pop_limit)
        ]
        # But leave this line alone; we have to reassign to population because we get a new population that has more cached stuff in it.
        population = pool.map(Individual.calculate_fitness, population,
                              batch_size)
        init_done = time.time()
        print("Created and calculated initial population statistics in:",
              init_done - init_time, "seconds")
        generation = 0
        start = time.time()
        now = start
        print("Use ctrl-c to terminate this loop manually.")
        try:
            while True:
                now = time.time()
                # Print out statistics
                if generation > 0:
                    best = max(population, key=Individual.fitness)
                    print("Generation:", str(generation))
                    print("Max fitness:", str(best.fitness()))
                    print("Average generation time:",
                          (now - start) / generation)
                    print("Net time:", now - start)
                    print()
                    with open("levels/last.txt", 'w') as f:
                        for row in best.to_level():
                            f.write("".join(row) + "\n")
                generation += 1
                # STUDENT Determine stopping condition
                stop_condition = False
                if stop_condition:
                    break
                # STUDENT Also consider using FI-2POP as in the Sorenson & Pasquier paper
                gentime = time.time()
                next_population = generate_successors(population)
                gendone = time.time()
                print("Generated successors in:", gendone - gentime, "seconds")
                # Calculate fitness in batches in parallel
                next_population = pool.map(Individual.calculate_fitness,
                                           next_population, batch_size)
                popdone = time.time()
                print("Calculated fitnesses in:", popdone - gendone, "seconds")
                population = next_population
        except KeyboardInterrupt:
            pass
    return population

示例#17

0

显示文件

文件： lookup.py 项目： fiyen/PaddlePaddle-SkipThoughts

 def lookup(self, words, batch=1000, epochs=10000, skip=True, threads=None):
     """
     查找单词的词向量，如果没有找到，则词向量被随机赋予一个向量
     :param words: 词集, 必须是list格式
     :param batch: 每个周期读取文件的行数
     :param epochs: 查找周期数
     :param skip:  是否跳过第一个词（第一个词可能是占位词）
     :param threads: 并行线程数，只有fast_mode下才有效，如果是None，则线程数等于cpu的线程数
     :return:
     """
     self.skip = skip
     if not isinstance(words, list):
         raise TypeError("words must be list type.")
     self.words_dict = {word: index for index, word in enumerate(words)}
     if not self.fast_mode:
         words = set(words)
         vectors = [[] for i in range(len(words))]
         self._build()
         left_num = 0
         flag = 0
         if skip:
             left_num = 1
         for epoch in range(epochs):
             lines = [[] for i in range(batch)]
             for bat in range(batch):
                 line = self.filehead.readline()
                 if line:
                     lines[bat] = line
                 else:
                     lines = lines[:bat]
                     flag = 1
                     print("Epoch: {}, The End of File, So Break.".format(
                         epoch))
                     break
             vector_ = self._process(lines)
             """for word in words:
                 try:
                     vectors[self.words[word]] = vector_[word]
                 except KeyError:
                     left_words.append(word)"""
             words_int = words.intersection(vector_.keys())
             words.difference_update(words_int)
             for word in words_int:
                 vectors[self.words_dict[word]] = vector_[word]
             if len(words) <= left_num:
                 print("Epoch: {}, All Words Are Mapped, So Break.".format(
                     epoch))
                 print("All words are mapped.")
                 return vectors
             if flag == 1:
                 vectors = self._free_padding(vectors)
                 return vectors
             if ((epoch + 1) * batch) % 100000 == 0:
                 print("Epoch: {}/{}, Complete.".format(epoch + 1, epochs))
         print("Epoch is Enough, So Return.")
         vectors = self._free_padding(vectors)
         return vectors
     else:  # fast_mode模式，用并行运算，分文件进行查找
         # 首先，对待查询的words，分成以split_label中各符号为首字母的单词字典
         # 其中， 字典的value值也是一个字典，key为单词，value为该单词在words中的index
         prepro = PreprocessVector(need_pro=self.need_pro)
         prepro.save_path(self.preprocessed_vector_path)
         prepro.get_path(filename=self.filename, path=self.path)
         prepro.subfile_name(self.split_label)
         prepro.process()
         vectors = [[] for i in range(len(words))]
         words = self._split()
         self._build()
         if threads is None:
             threads = cpu_count()
         p = pool.Pool(threads)
         labels = [label for label in self.split_label.keys()]
         words_dicts = [words[label] for label in labels]
         batch_bag = [batch for i in range(len(labels))]
         epochs_bag = [epochs for i in range(len(labels))]
         skip_bag = [skip for i in range(len(labels))]
         vectors_dicts = p.map(
             self._swap,
             zip(labels, words_dicts, batch_bag, epochs_bag, skip_bag))
         p.close()
         for vectors_dict in vectors_dicts:
             for key in vectors_dict.keys():
                 vectors[key] = vectors_dict[key]
         return self._free_padding(vectors)

示例#18

0

显示文件

def main():
    # coco lable文件（如training2017.json）需要存储的信息
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }

    # 初始化id（以后依次加一）
    image_id = 1
    annotation_id = 1

    # 加载图片信息
    im_files = [f for f in IMAGE_DIR.iterdir()]
    im_files.sort(key=lambda f: f.stem, reverse=True)  # 排序，防止顺序错乱、数据和标签不对应
    # print("im-length:",len(im_files),"\n im_files：",im_files)

    myPool = pool.Pool(processes=4)  # 并行化处理

    for im_file in im_files:
        # 写入图片信息（id、图片名、图片大小）,其中id从1开始
        image = Image.open(im_file)
        im_info = pycococreatortools.create_image_info(image_id, im_file.name,
                                                       image.size)  # 图片信息
        coco_output['images'].append(im_info)  # 存储图片信息（id、图片名、大小）

        annotation_info_list = []  # 存储标注信息

        # 用于制作stuff-thing map
        img_cv = cv2.imread(str(im_file))  #调用opencv读取，方便后面使用opencv绘制mask、保存结果
        rectangle = np.zeros(img_cv.shape[0:3], dtype="uint8")  # 新建空白图像
        # 使用白色填充图片区域,默认为0-黑色,255-白色
        rectangle.fill(125)  # 125 灰色

        # 处理label信息， 包括左上角、右下角、四个角点（用于分割）
        bounding_box, segmentation = get_info(im_file)
        class_id = 1  # id 为数字形式，如 1,此时是list形式，后续需要转换 # 指定为1，因为只有”是车牌“这一类

        # 显示日志
        print(bounding_box, segmentation)

        # 制作stuff-thing map
        color = random_color(class_id)  # 得到当前类别的颜色，保证每幅图像里每一类的颜色都相同
        make_seg_mask(rectangle, segmentation, color)

        # area = bounding_box[-1] * bounding_box[-2]  # 当前bounding-box的面积,宽×高
        area = compute_polygon_area(
            segmentation)  # 当前segmentation的面积（比bounding box更精确）

        myPool.apply_async(func=pycococreatortools.mask_create_annotation_info,
                           args=(annotation_id, image_id, class_id, area,
                                 image.size, bounding_box, segmentation),
                           callback=annotation_info_list.append)
        # an_infos = pycococreatortools.mask_create_annotation_info(annotation_id=annotation_id, image_id=image_id,
        #                                                           category_id=class_id, area=area,
        #                                                           image_size=image.size, bounding_box=bounding_box,
        #                                                           segmentation=segmentation)
        # annotation_info_list.append(an_infos)
        cv2.imwrite(file_path + str(im_file.stem) + ".png", rectangle)

        # 上面得到单张图片的所有bounding-box信息，接下来每单张图片存储一次
        for annotation_info in annotation_info_list:
            if annotation_info is not None:
                coco_output['annotations'].append(annotation_info)
        image_id += 1

    myPool.close()
    myPool.join()
    # 保存成json格式
    print("[INFO] Storing annotations json file...")
    output_json = Path(f'ccpd_annotations.json')
    with output_json.open('w', encoding='utf-8') as f:
        json.dump(coco_output, f)
    print("[INFO] Annotations JSON file saved in：", str(output_json))

示例#19

0

显示文件

文件： lookup.py 项目： fiyen/PaddlePaddle-SkipThoughts

 def process(self,
             batch=10000,
             encoding='utf-8',
             sorted=False,
             threads=None,
             split_label='&cut&',
             end_label='000000000',
             remove=True):
     """
     进行文件切分
     :param batch: 每次读入的文件行数，取适当的值可以最大化利用cpu性能，默认1000，过大会造成cpu空置，过小会浪费硬盘吞吐能力
                 另外，batch小的时候，可以基本保持源文件中每行的先后顺序, 但是会增加内存的开销
     :param encoding: 文件的编码格式
     :param sorted: 是否需要对切分后的文件重新排序，以使子文件中每行的顺序与源文件相同，不建议排序，因为耗时较长，且需要重排的
                 数据不多，切分后的数据已与源文件基本相同，相差不大
     :param threads: 线程数，不能大于计算机cpu的线程数，否则反而会拖慢速度。如果不设置，则默认等于cpu的线程数
     :param split_label: 当sorted为True时使用，split时切割的标志
     :param end_label: line首尾标志，用来标记正确处理的line
     :param remove: 用于确定是否删除临时文件，仅用于sorted为True时
     注意：1.如果sorted为True，则会先将切分后的子文件每行之前加上其在源文件中的排序，然后调用sort函数进行重新排序
          2.函数处理速度受硬盘吞吐量的影响，所以建议先用save_path函数将切分文件存在固态硬盘，然后调用copy函数将文件拷回目标文件夹
          3.并行过程会随机出现一种错误，写入的line会以随机长度被写入两行，导致后续处理出错，生成的数据不能直接使用，必须处理掉错误
          生成的那一部分line，这个错误只有用pool.Pool().apply时才可避免，但是apply的速度是最慢的。
          4.引入end_label来标记每一行，只有包含头尾都包含end_label的line才会被认为是正确的格式，进而被处理
     """
     self.batch = batch
     self.sorted = sorted
     self.encoding = encoding
     self.split_label = split_label
     self.end_label = end_label
     if threads is None:
         threads = cpu_count()
     start_time = time.time()
     print(
         "Start Preprocessing Vectors: This line only shows when 'process' in class PreprocessVector is called."
     )
     if self.need_pro or not self._search():
         if not os.path.exists(self.subfile_path):
             os.makedirs(self.subfile_path)
         if self.need_pro:
             dirs = os.listdir(self.subfile_path)
             for i in dirs:
                 os.remove(self.subfile_path + '/' + i)
         file = open(self.path + '/' + self.filename,
                     'r',
                     encoding=encoding)
         p = pool.Pool(threads)
         # 为了避免第一行是整个文件的注释，这里做一些处理
         tof = file.readline()
         if len(tof.split()) > 10:  # 10是个随机给的值，因为如果是正常的格式，肯定大于10
             file.seek(0)
         tem = [[file.readline() for x in range(self.batch)]
                for y in range(threads)]
         subfiles = [[line.strip() for line in lines if line]
                     for lines in tem if lines[0]]
         start = 0
         while len(subfiles) > 0:
             starts = []
             for lines in subfiles:
                 starts.append(start)
                 start = start + len(lines)
             for f, s in zip(subfiles, starts):
                 p.apply(self._process, args=(f, s))
             #p.map_async(self._wrap_p, zip(subfiles, starts))
             tem = [[file.readline() for x in range(self.batch)]
                    for y in range(threads)]
             subfiles = [[line.strip() for line in lines if line]
                         for lines in tem if lines[0]]
         p.close()
         #p.join()
         file.close()
         print("Preprocessing Operation is Completed. Cost Time is {:.2f}s".
               format(time.time() - start_time))
     if sorted is True:
         self.sort(encoding=encoding, remove=remove)

示例#20

0

显示文件

    return fn(*args)


def run_star(args):
    return run_download(*args)


if __name__ == '__main__':
    kaggle_int = 'kaggle.ini'

    if not os.path.exists(kaggle_int):
        print("Please create kaggle.ini first. See kaggle.ini.sample.")
        exit()

    competition, destination = read_args()
    username, password = read_config(kaggle_int)

    if username == "*****@*****.**" or password == "KAGGLE_PASSWORD":
        print(
            "Please setup kaggle.ini using your kaggle username and password.")

    else:
        session = login(username, password)
        data_url_list = get_data_url_by_name(competition)

        pool = pool.Pool()
        tasks = [(download, (url, session, destination))
                 for url in data_url_list]
        results = pool.map_async(run_star, tasks)
        results.wait()

示例#21

0

显示文件

    def fit(self, model: SupervisedHeterogeneousNodeClassificationModel,
            dataset: Dataset) -> None:
        args = self.args

        self.device = "cpu" if not torch.cuda.is_available(
        ) or args.cpu else args.device_id[0]

        self.data = preprocess_dataset(dataset)

        global graph_pool
        graph_pool = self.data
        self.target_type = "def"
        self.train_target_nodes = self.data.train_target_nodes
        self.valid_target_nodes = self.data.valid_target_nodes
        self.test_target_nodes = self.data.test_target_nodes

        self.types = self.data.get_types()
        self.criterion = torch.nn.NLLLoss()

        self.stats = []
        self.res = []
        self.best_val = 0
        self.train_step = 0

        self.pool = mp.Pool(args.n_pool)
        self.st = time.time()
        self.jobs = prepare_data(
            args,
            self.data,
            self.target_type,
            self.train_target_nodes,
            self.valid_target_nodes,
            self.pool,
        )
        """
            Initialize GNN (model is specified by conv_name) and Classifier
        """
        self.gnn = GNN(
            conv_name=args.conv_name,
            in_dim=len(
                self.data.node_feature[self.target_type]["emb"].values[0]),
            n_hid=args.n_hid,
            n_heads=args.n_heads,
            n_layers=args.n_layers,
            dropout=args.dropout,
            num_types=len(self.types),
            num_relations=len(self.data.get_meta_graph()) + 1,
            prev_norm=args.prev_norm,
            last_norm=args.last_norm,
            use_RTE=False,
        )

        if args.use_pretrain:
            self.gnn.load_state_dict(load_gnn(
                torch.load(args.pretrain_model_dir)),
                                     strict=False)
            print("Load Pre-trained Model from (%s)" % args.pretrain_model_dir)

        self.classifier = Classifier(args.n_hid, self.data.y.max().item() + 1)

        self.model = torch.nn.Sequential(self.gnn,
                                         self.classifier).to(self.device)

        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=5e-4)

        if args.scheduler == "cycle":
            self.scheduler = torch.optim.lr_scheduler.OneCycleLR(
                self.optimizer,
                pct_start=0.02,
                anneal_strategy="linear",
                final_div_factor=100,
                max_lr=args.max_lr,
                total_steps=args.n_batch * args.n_epoch + 1,
            )
        elif args.scheduler == "cosine":
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                self.optimizer, 500, eta_min=1e-6)
        else:
            assert False

        self.train_data = [job.get() for job in self.jobs[:-1]]
        self.valid_data = self.jobs[-1].get()
        self.pool.close()
        self.pool.join()

        self.et = time.time()
        print("Data Preparation: %.1fs" % (self.et - self.st))

        for epoch in np.arange(self.args.n_epoch) + 1:
            """
            Prepare Training and Validation Data
            """
            train_data = [job.get() for job in self.jobs[:-1]]
            valid_data = self.jobs[-1].get()
            self.pool.close()
            self.pool.join()
            """
                After the data is collected, close the pool and then reopen it.
            """
            self.pool = mp.Pool(self.args.n_pool)
            self.jobs = prepare_data(
                self.args,
                self.data,
                self.target_type,
                self.train_target_nodes,
                self.valid_target_nodes,
                self.pool,
            )
            self.et = time.time()
            print("Data Preparation: %.1fs" % (self.et - self.st))
            """
                Train
            """
            self.model.train()
            train_losses = []
            for (
                    node_feature,
                    node_type,
                    edge_time,
                    edge_index,
                    edge_type,
                    x_ids,
                    ylabel,
            ) in train_data:
                node_rep = self.gnn.forward(
                    node_feature.to(self.device),
                    node_type.to(self.device),
                    edge_time.to(self.device),
                    edge_index.to(self.device),
                    edge_type.to(self.device),
                )
                res = self.classifier.forward(node_rep[x_ids])
                loss = self.criterion(res, ylabel.to(self.device))

                self.optimizer.zero_grad()
                torch.cuda.empty_cache()
                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.args.clip)
                self.optimizer.step()

                train_losses += [loss.cpu().detach().tolist()]
                self.train_step += 1
                self.scheduler.step(self.train_step)
                del res, loss
            """
                Valid
            """
            self.model.eval()
            with torch.no_grad():
                (
                    node_feature,
                    node_type,
                    edge_time,
                    edge_index,
                    edge_type,
                    x_ids,
                    ylabel,
                ) = valid_data
                node_rep = self.gnn.forward(
                    node_feature.to(self.device),
                    node_type.to(self.device),
                    edge_time.to(self.device),
                    edge_index.to(self.device),
                    edge_type.to(self.device),
                )
                res = self.classifier.forward(node_rep[x_ids])
                loss = self.criterion(res, ylabel.to(self.device))
                """
                    Calculate Valid F1. Update the best model based on highest F1 score.
                """
                valid_f1 = f1_score(ylabel.tolist(),
                                    res.argmax(dim=1).cpu().tolist(),
                                    average="micro")

                if valid_f1 > self.best_val:
                    self.best_val = valid_f1
                    # torch.save(
                    #     self.model,
                    #     os.path.join(
                    #         self.args.model_dir,
                    #         self.args.task_name + "_" + self.args.conv_name,
                    #     ),
                    # )
                    self.best_model_dict = deepcopy(self.model.state_dict())
                    print("UPDATE!!!")

                self.st = time.time()
                print((
                    "Epoch: %d (%.1fs)  LR: %.5f Train Loss: %.2f  Valid Loss: %.2f  Valid F1: %.4f"
                ) % (
                    epoch,
                    (self.st - self.et),
                    self.optimizer.param_groups[0]["lr"],
                    np.average(train_losses),
                    loss.cpu().detach().tolist(),
                    valid_f1,
                ))
                self.stats += [[
                    np.average(train_losses),
                    loss.cpu().detach().tolist()
                ]]
                del res, loss
            del train_data, valid_data

        self.model.load_state_dict(self.best_model_dict)
        best_model = self.model.to(self.device)
        # best_model = torch.load(
        #     os.path.join(
        #         self.args.model_dir, self.args.task_name + "_" + self.args.conv_name
        #     )
        # ).to(self.device)
        best_model.eval()
        gnn, classifier = best_model
        with torch.no_grad():
            test_res = []
            for _ in range(10):
                (
                    node_feature,
                    node_type,
                    edge_time,
                    edge_index,
                    edge_type,
                    x_ids,
                    ylabel,
                ) = node_classification_sample(
                    self.args,
                    self.target_type,
                    randint(),
                    self.test_target_nodes,
                    {1: True},
                )
                paper_rep = gnn.forward(
                    node_feature.to(self.device),
                    node_type.to(self.device),
                    edge_time.to(self.device),
                    edge_index.to(self.device),
                    edge_type.to(self.device),
                )[x_ids]
                res = classifier.forward(paper_rep)
                test_acc = accuracy_score(ylabel.tolist(),
                                          res.argmax(dim=1).cpu().tolist())
                test_res += [test_acc]
            return dict(Acc=np.average(test_res))

示例#22

0

显示文件

import pickle
from multiprocessing import pool

data = pd.read_csv(
    'all-prediction-matrix.csv'
).values  #global matrix accessed by all treads. bad, bad, bad coding practice, but research


def compute_disagreement_row_in_upper_triangular(
        i):  #i is the reference column to compute disagreement with
    right_results = [
        np.logical_xor(data[:, i], data[:, j]).sum()
        for j in range(i + 1, data.shape[1])
    ]  #only compute for columns on the right of i
    results = np.zeros(data.shape[1])
    results[i + 1:] = right_results[:]  #pad with zeros on the left side
    return i, results


if __name__ == "__main__":
    poo = pool.Pool()
    res = poo.map(compute_disagreement_row_in_upper_triangular,
                  range(data.shape[1]))
    results = np.vstack([j for i, j in sorted(res, key=lambda x: x[0])
                         ])  #sort and combine rows
    results += results.T  #copy upper triangular to lower triangular
    results = results / float(data.shape[0])  #the fraction of disagreements
    results = 1. - results  #the fraction of agreements

    pickle.dump(results, open("results.p", "wb"))

示例#23

0

显示文件

文件： download_utils.py 项目： newbiesitl/video_parsing

        print(this_file, 'download finish')


def download_file_given_file_name(file_name):
    url = get_video_url(file_name)
    ret = requests.get(url)
    contents = ret.content
    if ret.status_code == 404:
        raise ValueError('Stream file missing %s' % (file_name))
    file_path = os.path.join(DATA_DIR, file_name)
    with open(file_path, 'wb+') as f:
        f.write(contents)


def download_file(url, filename):
    '''
    downloads a the contents of the provided url to a local file
    '''
    contents = requests.get(url).content
    with open(filename, 'wb+') as f:
        f.write(contents)


if __name__ == "__main__":
    from multiprocessing import pool
    p = pool.Pool(20)
    p.map(download_all_videos, [None] * 20)
    # download_all_videos()

示例#24

0

显示文件

def main():
    cli.setup_logging()
    parser = argparse.ArgumentParser(
        description='Plot query response time histogram from answers stored '
        'in LMDB')
    parser.add_argument(
        '-o',
        '--output',
        type=str,
        default='histogram',
        help='output directory for image files (default: histogram)')
    parser.add_argument('-f',
                        '--format',
                        type=str,
                        default='png',
                        help='output image format (default: png)')
    parser.add_argument('-c',
                        '--config',
                        default='respdiff.cfg',
                        dest='cfgpath',
                        help='config file (default: respdiff.cfg)')
    parser.add_argument('envdir',
                        type=str,
                        help='LMDB environment to read answers from')
    args = parser.parse_args()
    config = cfg.read_cfg(args.cfgpath)
    servers = config['servers']['names']
    dnsreplies_factory = DNSRepliesFactory(servers)

    with LMDB(args.envdir, readonly=True) as lmdb_:
        adb = lmdb_.open_db(LMDB.ANSWERS)

        try:
            MetaDatabase(lmdb_, servers,
                         create=False)  # check version and servers
        except NotImplementedError as exc:
            logging.critical(exc)
            sys.exit(1)

        with lmdb_.env.begin(adb) as txn:
            data = load_data(txn, dnsreplies_factory)

    def get_filepath(filename) -> str:
        return os.path.join(args.output, filename + '.' + args.format)

    if not os.path.exists(args.output):
        os.makedirs(args.output)
    create_histogram({k: [tup[0] for tup in d]
                      for (k, d) in data.items()}, get_filepath('all'), 'all',
                     config)

    # rcode-specific queries
    with pool.Pool() as p:
        fargs = []
        for rcode in range(HISTOGRAM_RCODE_MAX + 1):
            rcode_text = dns.rcode.to_text(rcode)
            filepath = get_filepath(rcode_text)
            fargs.append((data, filepath, rcode_text, config, rcode))
        p.starmap(histogram_by_rcode, fargs)
    filepath = get_filepath('unparsed')
    histogram_by_rcode(data, filepath, 'unparsed queries', config, None)

示例#25

0

显示文件

文件： demo-09.py 项目： aaqingsongyike/Python

#异步
from multiprocessing import pool
import time
import os


def test():
    print("---进程池中的进程---pid=%d,ppid=%d" % (os.getpid(), os.getppid()))
    for i in range(3):
        print("---%d---" % i)
        time.sleep(1)
    return "haha"


#args的值为test函数的返回值
def test2(args):
    print("---callback func--pid=%d" % os.getpid())
    print("---callback func--args=%s" % args)


po = pool.Pool(3)
'''
当子进程执行完test函数时，唤醒主进程回调test2函数
实现异步操作
'''
po.apply_async(func=test, callback=test2)  #callback回调   实现异步

while True:
    time.sleep(1)
    print("---主进程---pid=%d" % os.getpid())