Exemplo n.º 1
0
    def crawl(self):
        n = MIN_NUMBER_OF_DOCS
        startingURL = START_PAGES
        os.makedirs(AFTER_CRAWL_BASE_DIR, exist_ok=True)
        self.n = n
        for sURL in startingURL:
            try:
                self.queue.extend(self.parseProfilePage(sURL))
            except:
                print('cannot parse profile page')
                with open(os.path.join(AFTER_CRAWL_BASE_DIR, ERRORS_FILE_NAME), "a") as ErrorFile:
                    ErrorFile.write('cannot parse profile page ',sURL,'\n')

        from progress.bar import IncrementalBar
        progress_bar = IncrementalBar('Crawling', max=MIN_NUMBER_OF_DOCS, suffix='%(percent)d%% %(remaining)s remaining - eta %(eta_td)s')
        threads = [CrawlThread(self, progress_bar) for t in range(NUMBER_OF_THREADS)]

        for t in threads:
            t.start()

        for t in threads:
            t.join()

        with open(os.path.join(AFTER_CRAWL_BASE_DIR, MAP_FILE_NAME), 'w') as outfile:
            json.dump(self.URLIDMap, outfile)

        progress_bar.finish()
Exemplo n.º 2
0
 def load(self, fp):
     self.size = 0
     file_size = os.fstat(fp.fileno()).st_size
     nblocks = 1 + (file_size - 1) // self.blocksize
     bar = IncrementalBar('Computing', max=nblocks)
     bar.suffix = '%(percent).1f%% - %(eta)ds'
     for block in bar.iter(file_read_iterator(fp, self.blocksize)):
         self.append(self._hash_block(block))
         self.size += len(block)
Exemplo n.º 3
0
class ProgressBar(RemoteProgress): # pragma: no cover
    '''Nice looking progress bar for long running commands'''
    def setup(self, repo_name):
        self.bar = Bar(message='Pulling from {}'.format(repo_name), suffix='')

    def update(self, op_code, cur_count, max_count=100, message=''):
        #log.info("{}, {}, {}, {}".format(op_code, cur_count, max_count, message))
        max_count = int(max_count or 100)
        if max_count != self.bar.max:
            self.bar.max = max_count
        self.bar.goto(int(cur_count))
Exemplo n.º 4
0
def save_frames(source, vertices, images_dir):
    print('Saving frames...')
    if not os.path.isdir(images_dir):
        os.makedirs(images_dir)
    bar = IncrementalBar(max=len(vertices))
    angle_change = 360 // len(vertices)
    for i, v in enumerate(vertices):
        update(source, v, angle_change=angle_change)
        mlab.savefig(filename=os.path.join(images_dir, frame_fn(i)))
        bar.next()
    bar.finish()
    mlab.close()
Exemplo n.º 5
0
    def parseHeader(self, msg):
        self.reset()
        if len(msg) != 21:
            print "Size mismatch, is", len(msg)
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)

        speriod = int(msg[9]  << 14 | msg[8]  << 7 | msg[7])
        srate   = 1./(speriod *1e-9)
        self.header = {
            "target_id"        : msg[2],
            "sample_number"    : msg[5] << 7 | msg[4],
            "sample_format"    : msg[6],
            "sample_period"    : speriod,
            "sample_rate"      : srate,
            "sample_length"    : msg[12] << 14 | msg[11] << 7 | msg[10],
            "sample_loop_start": msg[15] << 14 | msg[14] << 7 | msg[13],
            "sample_loop_end"  : msg[18] << 14 | msg[17] << 7 | msg[16],
            "loop_type"        : msg[19],
            }

        if self.debug:
            print "Sample Dump Header"
            print "  Data:"
            for k,v in self.header.iteritems():
                print "    %s:" % k, v

        self.raw += msg
        format = int(self.header["sample_format"])
        length = int(self.header["sample_length"])
        self.exppacket = (format+6)/7*length/120+1
        self.starttime = time.time()
        self.bar = IncrementalBar(
            "Receiving sample dump", max=self.exppacket,
            suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]')
        return HandshakeMessage.ACK(packetnumber=self.lastpacket)
Exemplo n.º 6
0
def mismas_features_distinto_humor(corpus):
    print("Buscando tweets con mismos valores de features pero distinto de humor...")

    humoristicos = [tweet for tweet in corpus if tweet.es_humor]
    no_humoristicos = [tweet for tweet in corpus if not tweet.es_humor]

    res = []

    bar = IncrementalBar("Buscando en tweets\t\t", max=len(humoristicos) * len(no_humoristicos),
                         suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)
    for tweet_humor in humoristicos:
        for tweet_no_humor in no_humoristicos:
            if tweet_humor.features == tweet_no_humor.features:
                res.append((tweet_humor, tweet_no_humor))
                if tweet_humor.texto_original == tweet_no_humor.texto_original:
                    print("-----MISMO TEXTO ORIGINAL------")
                if tweet_humor.texto == tweet_no_humor.texto:
                    print("----------MISMO TEXTO----------")
                if tweet_humor.id == tweet_no_humor.id:
                    print("-----------MISMO ID------------")
                if tweet_humor.cuenta == tweet_no_humor.cuenta:
                    print("----------MISMA CUENTA---------")
                print('')
                print(tweet_humor.id)
                print(tweet_humor.texto)
                print("------------")
                print(tweet_no_humor.id)
                print(tweet_no_humor.texto)
                print("------------")
                print('')
            bar.next()
    bar.finish()

    return res
Exemplo n.º 7
0
    def render(self, ctx, invert=False, filename=None, pbar=False):
        """ Generate image of layer.

        Parameters
        ----------
        ctx : :class:`GerberContext`
            GerberContext subclass used for rendering the image

        filename : string <optional>
            If provided, save the rendered image to `filename`

        pbar : bool <optional>
            If true, render a progress bar
        """
        ctx.set_bounds(self.bounds)
        ctx._paint_background()

        if invert:
            ctx.invert = True
            ctx._clear_mask()
        for p in self.primitives:
            ctx.render(p)
        if invert:
            ctx.invert = False
            ctx._render_mask()

        _pbar = None
        if pbar:
            try:
                from progress.bar import IncrementalBar
                _pbar = IncrementalBar(
                    self.filename, max=len(self.primitives)
                )
            except ImportError:
                pbar = False

        for p in self.primitives:
            ctx.render(p)
            if pbar:
                _pbar.next()
        if pbar:
            _pbar.finish()

        if filename is not None:
            ctx.dump(filename)
Exemplo n.º 8
0
def _create_unfilled_voxel_data(
        model_id, edge_length_threshold=0.1, voxel_config=None,
        overwrite=False, example_ids=None):
    from template_ffd.data.ids import get_example_ids
    from shapenet.core import cat_desc_to_id
    from template_ffd.model import load_params
    import numpy as np
    from progress.bar import IncrementalBar
    if voxel_config is None:
        voxel_config = _default_config
    cat_id = cat_desc_to_id(load_params(model_id)['cat_desc'])
    if example_ids is None:
        example_ids = get_example_ids(cat_id, 'eval')
    mesh_dataset = get_inferred_mesh_dataset(model_id, edge_length_threshold)
    voxel_dataset = get_voxel_dataset(
        model_id, edge_length_threshold, voxel_config, filled=False,
        auto_save=False)
    if not overwrite:
        example_ids = [i for i in example_ids if i not in voxel_dataset]
    if len(example_ids) == 0:
        return
    print('Creating %d voxels for model %s' % (len(example_ids), model_id))

    kwargs = dict(
        voxel_dim=voxel_config.voxel_dim,
        exact=voxel_config.exact,
        dc=voxel_config.dc,
        aw=voxel_config.aw)

    with mesh_dataset:
        bar = IncrementalBar(max=len(example_ids))
        for example_id in example_ids:
            bar.next()
            mesh = mesh_dataset[example_id]
            vertices, faces = (
                np.array(mesh[k]) for k in ('vertices', 'faces'))
            binvox_path = voxel_dataset.path(example_id)
            # x, z, y = vertices.T
            # vertices = np.stack([x, y, z], axis=1)
            bio.mesh_to_binvox(
                vertices, faces, binvox_path, **kwargs)
        bar.finish()
Exemplo n.º 9
0
 def startDump(self,filename,size):
     if not self.dump_on: return
     self.dump_written = 0
     self.dump_size = size
     self.closeDumpFile()
     self.createDumpFile(filename)
     print "Dumping '%s'" % filename
     showsize = ' 0x%(index)06x' if self.dump_ram else ''
     self.bar = IncrementalBar(
         max=size,
         suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]' + showsize)
Exemplo n.º 10
0
    def render_deferred(self):

        if not len(self._deferred):
            return

        print("Optimizing deferred elements")
        paths = self._optimize_deferred().paths

        print("Rendering Paths")
        try:
            from progress.bar import IncrementalBar
            _pbar = IncrementalBar(max=len(paths))
        except ImportError:
            _pbar = None

        for path in paths:
            self._render_path(path)
            if _pbar:
                _pbar.next()
        if _pbar:
            _pbar.finish()
Exemplo n.º 11
0
def main():
    path, vidcap = takeVideo()
    trash, image0 = vidcap.read()
    rows, cols, nslice = image0.shape
    frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    frameRate = (vidcap.get(cv2.CAP_PROP_FPS))
    print("# Original FrameRate: ", frameRate)
    print(
        "# Aborting with ctrl-C may cause some issues. If it happens, just close the terminal. Btw, i suggest not to abort."
    )
    bar = IncrementalBar('Doing stuff...', max=frames)
    path = os.path.split(path)
    path = path[1].split('.')
    path_out = "output_videos/"
    for i in range(len(path) - 1):
        path_out += path[i]
    path_out += "_out."
    path_out += path[len(path) - 1]
    out = cv2.VideoWriter(path_out, cv2.VideoWriter_fourcc(*'DIVX'),
                          frameRate * 2, (cols, rows))
    iterator = 1
    while (iterator < frames - 1):
        pairs = []
        new_frames = []
        u = 0
        for k in range(50):
            success, image1 = vidcap.read()
            if (not success):
                break
            pairs.append((image0, image1))
            new_frames.append(image0)
            iterator += 1
            u += 1
            image0 = image1

        for k in range(u):
            bar.next()

        typed_pairs = List()
        typed_new_frames = List()
        [typed_pairs.append(x) for x in pairs]
        [typed_new_frames.append(x) for x in new_frames]
        new_frames = njitProcess(typed_pairs, typed_new_frames, rows, cols)
        for k in range(u):
            out.write(pairs[k][0])
            out.write(new_frames[k])

    bar.next()
    bar.finish()
    out.release()
Exemplo n.º 12
0
def trainEpochs(dataset, epochs):
    bar = IncrementalBar("Training no GAN", max=epochs)
    for epoch in range(epochs):
        for vector_batch in dataset:
            train_step(vector_batch)
        bar.next()
    bar.finish()
Exemplo n.º 13
0
def batch_ctrv_integration(state_init, imu_accel, imu_gyro):

    my_bar = IncrementalBar('CTRV_model', max=len(imu_accel))
    save_state = []
    state_v_out = []
    dt = compute_timedelta(imu_accel)[1:]
    # dt_list = compute_timedelta(imu_accel)

    # for count, dt in enumerate(dt_list):
    for count in range(len(dt)):
        if count == 0:
            state_v = deepcopy(state_init)
            save_state.append(state_v)

        else:
            state_v = deepcopy(state_v_out)

        state_v_out = ctrv_imu_command_step(
            state_v_in=state_v,
            a_imu=imu_accel[count],
            w_imu=imu_gyro[count],
            dt_imu=dt[count],
        )

        save_state.append(state_v_out)
        my_bar.next()
    my_bar.finish()
    return ctrv2DataFrame(state=save_state, ref_data=imu_accel)
Exemplo n.º 14
0
def makeReviewPageLinks(bs_obj, reviews = 10, nofHotels = 10):
    """ TripAdvisor review pages links for city-specific hotels """
    tripadvisor_url = "https://www.tripadvisor.com"
    hotel_links = []
    hotelsReviews = bs_obj.findAll('a', {'class': 'review_count'})
    for link in hotelsReviews:
        review_count = StrToInt(link.get_text().split()[0])
        if review_count >= reviews:
            pass
        else:
            hotelsReviews.remove(link)

    bar = IncrementalBar('Generating Hotel Links', max = nofHotels)
    for link in hotelsReviews[:nofHotels]:
        # print(type(link))
        # print(review_count, '\n')
        link_suffix = link['href']
        hotelLink = tripadvisor_url + link_suffix
        hotel_links.append(hotelLink)
        for i in range(5,reviews-4,5):
            next_page = hotelLink[:(hotelLink.find('Reviews')+7)]+ f'-or{i}' + hotelLink[(hotelLink.find('Reviews')+7):]
            hotel_links.append(next_page)

        bar.next()
    bar.finish()
    return hotel_links
Exemplo n.º 15
0
def worker_test(alphas, SVs, test, thread_id, q):

    _, cont_matrix = parse_SAX_vector(SVs[0, :np.max(SVs[0, :].nonzero()[1]) +
                                          1].todense())
    num_contin = cont_matrix.shape[0]
    bar = IncrementalBar('Task ' + str(100 + thread_id)[1:] +
                         ': Calculating Decomposed Scores...',
                         max=test.shape[0])

    scores_decomposed = np.zeros((test.shape[0], 1 + num_contin), dtype=float)
    for j in range(test.shape[0]):
        seq2, cont_matrix2 = parse_SAX_vector(
            test[j, :np.max(test[j, :].nonzero()[1]) + 1].todense())
        for i in range(SVs.shape[0]):
            seq1, cont_matrix1 = parse_SAX_vector(
                SVs[i, :np.max(SVs[i, :].nonzero()[1]) + 1].todense())
            scores_decomposed[j, 0] += alphas[i] * SAX.MKAD_kernel_function(
                np.transpose(seq1), np.transpose(seq2))
            for l in range(num_contin):
                scores_decomposed[j, 1 +
                                  l] += alphas[i] * SAX.MKAD_kernel_function(
                                      np.transpose(cont_matrix1[l, :]),
                                      np.transpose(cont_matrix2[l, :]))
        bar.next()
    bar.finish()
    q.put(scores_decomposed)
    return ([])
def create_population(graph):
    bar = IncrementalBar("Updating Populations", max=len(graph.nodes()))
    with open("data/Washington/population.csv", "w") as handle:
        for node, data in graph.nodes(data=True):
            bar.next()
            handle.write("{},{}\n".format(node, data.get('population')))
    bar.finish()
def create_initial(graph):
    bar = IncrementalBar("Updating Initial Seed", max=len(graph.nodes()))
    with open("data/Washington/initial_map.csv", "w") as handle:
        for node, data in graph.nodes(data=True):
            bar.next()
            handle.write("{},{}\n".format(node, data.get('district') - 1))
    bar.finish()
Exemplo n.º 18
0
def iterative_embedding_comparison(all_embs_dict, relative_to=None):
    errors = []
    prev = relative_to

    bar = IncrementalBar('Doing Kabsch algorithm...', max=len(all_embs_dict))
    for key in sorted(all_embs_dict.keys()):
        bar.next()
        if prev is None:
            prev = all_embs_dict[key]
            continue

        error = kabsch_rmsd_error(*get_numpy_V(prev, all_embs_dict[key]))
        errors.append(error)

        if relative_to is None:
            prev = all_embs_dict[key]
    bar.finish()

    #TODO: formalize results serialization
    plt.figure()
    plt.plot(errors, 'r.-', label='rotational error over time')
    plt.legend()
    plt.xlabel('iteration')
    plt.ylabel('error{}'.format(' (relative to other)' if relative_to is not None else ''))
    plt.show()
 def run(self, initial, iter, param_func, out_dir):
     """
     Run the algorithm with certain number of iterations, given an specific parameter function
     :param initial: initial map
     :param iter: number of iterations
     :param param_func: parameter function
     :return: final sample
     """
     curr = initial
     boundary_nodes = set()
     num_nodes = [0 for i in range(self.district_num)]
     
     boundary_edges = self.get_boundary(initial)
     boundary_lengths = [0 for i in range(self.district_num)]
     
     for e in boundary_edges:
         boundary_nodes.add(e[0])
         boundary_nodes.add(e[1])
         
     for n in self.adj_graph.nodes():
         num_nodes[initial[n]] += 1
         if n in boundary_nodes or n in self.bound:
             boundary_lengths[initial[n]] += 1
             
     
     self.compactness_energy(initial)
     bar = IncrementalBar("Simulation Progress", max=iter)
     for i in range(iter):
         sample = self.make_one_move(curr, param_func, i, num_nodes, boundary_lengths)
         if i % 20000 == 0 or i == iter - 1:
             self.save_intermediate_result(i, curr, out_dir)
         bar.next()
         curr = sample
     bar.finish()
     return curr
Exemplo n.º 20
0
def resize():
  arguments_array = sys.argv
  if len(arguments_array) < 3:
    raise ValueError('Required parameters are missing.')

  source_files_dir = arguments_array[1]
  output_files_dir = arguments_array[2]
  percentage = float(arguments_array[3] if len(arguments_array) >= 4 else 25) / 100
  files = os.listdir(source_files_dir)
  bar = IncrementalBar('Resizing', max=len(files))

  for filename in files:
    if filename.endswith(('.jpg', '.JPG')) is False:
      continue

    with Image(filename=os.path.join(source_files_dir, filename)) as img:
      width = int(img.width * percentage)
      height = int(img.height * percentage)

      with img.clone() as image_clone:
        image_clone.resize(width, height)
        image_clone.save(filename=os.path.join(output_files_dir, filename))
        bar.next()

  bar.finish()
    def get_user_friends_groups(self):
        user_friends = self.get_user_friends()
        all_friends_groups = []
        current_user = 0

        bar = IncrementalBar('Запрос групп, в которых состоят друзья', max=len(user_friends))
        while current_user < len(user_friends):
            chunk_friends = user_friends[current_user: current_user + 25]
            code = 'var friends_groups = [];' \
                   'var friend_groups;' \
                   f'var friends = {chunk_friends};' \
                   'var i = 0;' \
                   'while (i < friends.length) {' \
                   '  friend_groups = API.groups.get({"user_id": friends[i], "extended": 0});' \
                   '  friends_groups.push(friend_groups);' \
                   '  i = i + 1;' \
                   '}' \
                   'return friends_groups;'
            params = {
                # 'user_id': 171691064,
                'access_token': TOKEN_VK,
                'v': API_VERSION_VK,
                'code': code,
            }
            method = 'execute'

            response = get_response(URL_VK, method, params)
            all_friends_groups.extend([groups['items'] for groups in response['response'] if type(groups) == dict])
            current_user += 25  # число максимальных запросов в методе execute
            bar.next(25)
        bar.finish()
        return set(itertools.chain.from_iterable(all_friends_groups))
Exemplo n.º 22
0
def sample_all(num):
    '''
    下面要考虑的是
    已知的图片有,一个模型24个角度下的图片,那就要对应24个视角的信息,
    这24个图对应的是一个初始模型,对于这个初始模型要再利用24个视角的信息生成24个xyz文件来对应那24个图
    一种方法是
        对于每个cat_id下的每一个物体,get_path只生成04530566/10155655850468db78d106ce0a280f87的目录
        在本函数里,先对生成模型,再对24个视角进行变化,生成24个模型,存储在04530566/10155655850468db78d106ce0a280f87/下
        要注意文件名称和图片的名称对应相等
    :param: num表示要采样多少个点1024,4096等
    :return: 
    '''
    #for cat,cat_id in data_generate.shapenet_taxonomy.shapenet_category_to_id.items():

    cat_id = '04530566'
    obj_path, view_path, output_folder = get_path(cat_id, num)
    print(obj_path)
    print(view_path)
    print(output_folder)

    # cat_id这个种类下需要生成len(obj_path)个xyz模型
    print('Sampling %d pointclouds for cat %s' % (len(obj_path), cat_id))
    bar = IncrementalBar(max=len(obj_path))
    for i in range(len(obj_path)):
        # 对于类别cat_id要生成的每个模型,根据obj文件所在路径、视角文件所在路径进行采样,并将结果输出到out_folder文件所在路径
        bar.next()
        sample_single(obj_path[i], view_path[i], output_folder[i],
                      num)  # 啊啊啊啊!这里view_path是个list,得传下标的要
    bar.finish()
def create_adjacency(graph):
    bar = IncrementalBar("Updating Adjacency Matrix", max=len(graph.edges()))
    with open("data/Washington/adjacency.csv", "w") as handle:
        for source, neighbor in graph.edges():
            bar.next()
            handle.write("{},{}\n".format(source, neighbor))
    bar.finish()
Exemplo n.º 24
0
def worker(index, svmlight_data, thread_id, q):

    bar = IncrementalBar('Task ' + str(100 + thread_id)[1:] +
                         ': Computing Kernel...',
                         max=len(index))
    K = np.zeros((len(index), svmlight_data.shape[0]), dtype=float)
    count = 0
    for I, i in enumerate(index):
        seq1, cont_matrix1 = parse_SAX_vector(
            svmlight_data[i, :svmlight_data.getrow(i).nonzero()[1][-1] +
                          1].todense())
        for j in range(i, svmlight_data.shape[0]):
            seq2, cont_matrix2 = parse_SAX_vector(
                svmlight_data[j, :svmlight_data.getrow(j).nonzero()[1][-1] +
                              1].todense())
            K[I, j] = 0.5 * SAX.MKAD_kernel_function(np.transpose(seq1),
                                                     np.transpose(seq2))
            for l in range(cont_matrix1.shape[0]):
                K[I, j] += 0.5 * SAX.MKAD_kernel_function(
                    np.transpose(cont_matrix1[l, :]),
                    np.transpose(cont_matrix2[l, :])) / cont_matrix1.shape[0]
            count += 1
        bar.next()
    bar.finish()
    q.put(K)
    return ([])
Exemplo n.º 25
0
def test_clean_aggregation(data_set, N_patients, tests, hours_obs, min_tests,
                           max_tests):
    print('Second imputation loop + aggregation -- tests features')
    ii = 0
    if np.logical_not(np.any(min_tests)):
        test_min = np.nanmin(data_set[tests].loc[:], axis=0)
    else:
        test_min = min_tests
    if np.logical_not(np.any(max_tests)):
        test_max = np.nanmax(data_set[tests].loc[:], axis=0)
    else:
        test_max = max_tests
    data_set_new = np.zeros([N_patients * hours_obs, len(tests)])
    bar2 = IncrementalBar('### TESTS features processing ### VS analyzed:',
                          max=len(tests))
    for test in tests:
        test_col = np.array(data_set[test])
        for idx in range(test_col.shape[0]):
            if np.isnan(test_col[idx]):
                test_col[idx] = 0
            elif test_col[idx] > test_max[ii]:
                test_col[idx] = 2
            elif test_col[idx] < test_min[ii]:
                test_col[idx] = 1
            else:
                test_col[idx] = 1 + (test_col[idx] - test_min[ii]) / (
                    test_max[ii] - test_min[ii])
        data_set_new[:, ii] = test_col
        ii = ii + 1
        bar2.next()
        # if ii%5==0: print("Tests analyzed in loop: ", ii)
    bar2.finish()
    return data_set_new, test_max, test_min
def render_vis(model,
               objective_f,
               param_f=None,
               optimizer=None,
               transforms=None,
               steps=2560,
               relu_gradient_override=True,
               output_size=1024,
               output_path='image.jpg'):
    """Adapted render_vis function from the Lucid library
	https://github.com/tensorflow/lucid/blob/master/lucid/optvis/render.py
	"""

    global _size

    with tf.Graph().as_default() as graph, tf.Session() as sess:

        T = render.make_vis_T(model, objective_f, param_f, optimizer,
                              transforms, relu_gradient_override)
        loss, vis_op, t_image = T('loss'), T('vis_op'), T('input')
        tf.global_variables_initializer().run()

        images = []
        bar = IncrementalBar('Creating image...',
                             max=steps,
                             suffix='%(percent)d%%')
        for i in range(steps):
            sess.run(vis_op, feed_dict={_size: 224})
            bar.next()
        bar.finish()
        print('Saving image as {}.'.format(output_path))
        img = sess.run(t_image, feed_dict={_size: output_size})
        PIL.Image.fromarray((img.reshape(output_size, output_size, 3) *
                             255).astype(np.uint8)).save(output_path)
Exemplo n.º 27
0
def get_jpg_urls(html):
    html_list = []
    url_list = []
    title = ''
    headers = {
        'User-Agent':
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0'
    }
    cook = {
        "Cookie":
        'JSESSIONID=4DC92CB4BAF1936E66C223BDD0E61972;Hm_lvt_307de5a4815acb9da76ced3a8b25b867=1564188967'
    }
    jpg_nums = 0
    try:
        print('Get Jpg Collecting... %s' % html)
        r = requests.get(html, cookies=cook, headers=headers,
                         timeout=10).content
        soup = BeautifulSoup(r, 'lxml')
        datas = soup.find(attrs={"class": "pages"})
        data = datas.find('a')
        title = html.split('/')[-2] + '-' + html.split(
            '/')[-1][:-5] + '-' + soup.find('title').text[:-4]
        jpg_nums = int(data.text[data.text.index('共') +
                                 1:data.text.index('页')])
        try:
            datas = soup.findAll('section')
            data = datas[1].find('p')
            data = data.find('img')
            url_list.append(data.get('src'))
            html_head = html[:html.index('.html')]
            for i in range(2, jpg_nums + 1):
                html_list.append(html_head + '_' + str(i) + '.html')
        except:
            print('Find error! %s' % html)
            pass
    except requests.exceptions.ConnectionError:
        print('ConnectionError:%s' % html)
        return []
    except requests.exceptions.ChunkedEncodingError:
        print('ChunkedEncodingError:%s' % html)
        return []

    print("This page have %d" % jpg_nums)
    bar = IncrementalBar("Collect Process",
                         max=jpg_nums,
                         suffix='%(index)d/%(max)d')
    for html in html_list:
        r = requests.get(html, cookies=cook, headers=headers,
                         timeout=10).content
        soup = BeautifulSoup(r, 'lxml')
        try:
            datas = soup.findAll('section')
            data = datas[1].find('img')
            url_list.append(data.get('src'))
            bar.next()
        except:
            pass
    bar.finish()
    return (title, url_list)
Exemplo n.º 28
0
def write_records(path, feature_specs, examples_fn):
    """
    Write data generated by examples_fn to a tfrecords file at path.

    Args:
        path: path to save file to
        feature_specs: a `FeatureSpec` or iterable of `FeatureSpec`s
        examples_fn: function producing an iterable of examples, where each
            example should be an iterable of ndarrays or a single ndarray if
            feature_specs is a FeatureSpec of an
    """
    from progress.bar import IncrementalBar
    feature_specs = _feature_specs(feature_specs)
    examples = examples_fn()
    if hasattr(examples, '__len__'):
        bar = IncrementalBar(max=len(examples))
    else:
        bar = ProxyBar()
    feature_fns = []
    keys = []
    for spec in feature_specs:
        if spec.dtype in _string_dtypes:
            feature_fns.append(_bytes_feature)
        elif spec.dtype in _float_dtypes:
            feature_fns.append(_float32_feature)
        elif spec.dtype in _int_dtypes:
            feature_fns.append(_int64_feature)
        else:
            raise RuntimeError('Invalid dtype: %s' % str(spec.dtype))
        keys.append(spec.key)
    try:
        folder = os.path.dirname(path)
        if not os.path.isdir(folder):
            os.makedirs(folder)
        with tf.python_io.TFRecordWriter(path) as writer:
            print('Creating tf_records: %s' % path)
            for example in examples:
                if isinstance(example, np.ndarray):
                    assert (len(feature_specs) == 1)
                    example = example,
                ex = tf.train.Example()
                features = tf.train.Features(feature={
                    k: f(e)
                    for k, f, e in zip(keys, feature_fns, example)
                })
                ex = tf.train.Example(features=features)
                writer.write(ex.SerializeToString())
                bar.next()
            bar.finish()
    except Exception:
        print('Writing dataset failed. Deleting...')
        if os.path.isfile(path):
            os.remove(path)
        raise
Exemplo n.º 29
0
def fillWithSharedLinks(accessToken, imgFolders, xlPath, savePath):
    # create the client object
    dbx = dropbox.Dropbox(accessToken)

    img_link_dict = dict()

    print("Getting Links .....")

    # loop through the folders
    for im in range(0, len(imgFolders)):

        imgFolder = imgFolders[im]

        # create a list of all images in the folder
        imgs = getAllFiles(dbx, imgFolder)

        # iterate through the list of images and create shared links (store the codes and links in a dict)
        bar = IncrementalBar(('Folder %d/%d' % (im + 1, len(imgFolders))), max=len(imgs))

        for entry in imgs:
            nm = entry.name
            pth = imgFolder + "/" + nm

            # check if a link already exists and retrieve it, if not create one
            lnkdata = dbx.sharing_list_shared_links(pth, direct_only=True)

            if len(lnkdata.links) == 0:
                metadata = dbx.sharing_create_shared_link_with_settings(pth)
                lnk = metadata.url
            else:
                lnk = lnkdata.links[0].url

            img_link_dict[nm[:-4]] = lnk

            bar.next()
        bar.finish()

    # open the excel file and get the sheet
    workBook = pyxl.load_workbook(xlPath)
    sheet = workBook['List']

    # iterate through
    print("Filling Spreadsheet .....")
    for row in range(2, sheet.max_row):
        # get the code
        code = str(sheet.cell(row, 1).value)

        if not img_link_dict.keys().__contains__(code):
            continue

        # get the link
        link = img_link_dict[code]

        # insert the link in the sheet
        sheet.cell(row, 3).value = link

    # save the excel sheet
    savenm = xlPath.split("\\")[-1].split(".")[0] + ' - With Links.xlsx'
    workBook.save(savePath + "\\" + savenm)
Exemplo n.º 30
0
 def __init__(
     self,
     shift_duration: int,
     sample_size=100,
     number_of_clusters=20,
     initial_state=None,
     policy="RandomRolloutPolicy",
     initial_location_depot=True,
     verbose=False,
 ):
     self.shift_duration = shift_duration
     if initial_state:
         self.state = initial_state
     else:
         self.state = clustering_scripts.get_initial_state(
             sample_size=sample_size,
             number_of_clusters=number_of_clusters,
             initial_location_depot=initial_location_depot,
         )
     self.stack = []
     self.time = 0
     self.rewards = []
     self.cluster_flow = {
         (start, end): 0
         for start in np.arange(len(self.state.clusters))
         for end in np.arange(len(self.state.clusters))
         if start != end
     }
     self.policy = get_policy(policy)
     self.metrics = World.WorldMetric()
     self.verbose = verbose
     if verbose:
         self.progress_bar = IncrementalBar(
             "Running World",
             check_tty=False,
             max=round(shift_duration / ITERATION_LENGTH_MINUTES) + 1,
             color=WHITE,
             suffix="%(percent)d%% - ETA %(eta)ds",
         )
def upload_changed_files(bintray_client, local_files, bintray_files):
    uploaded_files = 0
    for path in IncrementalBar(f"Uploading files",
                               suffix=PROGRESS_BAR_FORMAT).iter(local_files):
        if not any(
                str(path) == local_path(bintray_file)
                and get_sha1_hash(path) == bintray_file["sha1"]
                for bintray_file in bintray_files):
            uploaded_files += 1
            bintray_client.upload_file(path)
    print(
        f"uploaded {uploaded_files} files, skipped {len(local_files) - uploaded_files} files that already existed"
    )
Exemplo n.º 32
0
def read_path(path):
    files = os.listdir(path)
    bar = IncrementalBar('Processing folder ' + path, max=len(files))
    for file in files:
        read_file(path + '/' + file)
        bar.next()

    bar.finish()
Exemplo n.º 33
0
 def calcular_features_faltantes_thread(self, tweets, identificador):
     if len(tweets) > 0:
         bar = IncrementalBar("Calculando features - " +
                              unicode(identificador),
                              max=len(tweets) * len(self.features),
                              suffix=SUFIJO_PROGRESS_BAR)
         bar.next(0)
         for tweet in tweets:
             for feature in list(self.features.values()):
                 self.abortar_si_feature_no_es_thread_safe(feature)
                 if feature.nombre not in tweet.features:
                     tweet.features[
                         feature.nombre] = feature.calcular_feature(tweet)
                 bar.next()
         bar.finish()
Exemplo n.º 34
0
def load_images(file_names, img_shape):
    imgs = []
    bar = IncrementalBar('Countdown', max=len(file_names))
    for f in file_names:
        bar.next()
        imgs.append(cv2.resize(cv2.imread(f, cv2.IMREAD_COLOR), img_shape, interpolation=cv2.INTER_AREA).reshape(-1, img_shape[0], img_shape[1], 3))
    bar.finish()
    return np.asarray(imgs).reshape(-1, img_shape[0], img_shape[1], 3)
Exemplo n.º 35
0
 def calcular_feature_thread(self, tweets, nombre_feature, identificador):
     if len(tweets) > 0:
         bar = IncrementalBar("Calculando feature " + nombre_feature + ' - ' + unicode(identificador),
                              max=len(tweets),
                              suffix=SUFIJO_PROGRESS_BAR)
         bar.next(0)
         feature = self.features[nombre_feature]
         self.abortar_si_feature_no_es_thread_safe(feature)
         for tweet in tweets:
             tweet.features[feature.nombre] = feature.calcular_feature(tweet)
             bar.next()
         bar.finish()
Exemplo n.º 36
0
def run_draw(mask_file, plate_bkg, plate_txt, dots=1600, save_all=False):
    mask_img = Image.open(mask_file)
    mask_img = mask_img.convert('RGB')
    size_x = mask_img.size[0]
    size_y = mask_img.size[1]

    min_val = min(size_x, size_y)
    min_rad = math.floor(min_val / 150)

    if (min_rad == 0): min_rad = 1
    max_rad = math.floor(min_val / 75)
    if (max_rad == 0):
        print("IMG Size is too small")
        quit()

    total_circles = dots

    image = Image.new(mode='RGB', size=(size_x, size_y), color='white')

    #from faker import Factory
    #fake = Factory.create()
    bar = IncrementalBar("Drawing Circles",
                         max=total_circles,
                         suffix='%(percent).1f%% - %(eta)ds')

    i = 0  #counter
    while (i < total_circles):
        new_circle = circle.generate(size_x, size_y, min_rad, max_rad)
        if(circle.is_free_space(image, new_circle[0], new_circle[1],\
            new_circle[2], size_x, size_y)):

            coordinates = circle.get_coordinates(new_circle[0], new_circle[1],
                                                 new_circle[2])

            if (in_mask(mask_img, new_circle[0], new_circle[1]) == False):
                #color = fake.hex_color()
                #plate = random.choice([color])
                plate = random.choice(plate_bkg)
            else:
                #plate = random.choice(["#000000"])
                plate = random.choice(plate_txt)

            circle.draw(image, coordinates,
                        mask_img.getpixel(
                            (new_circle[0], new_circle[1])))  #plate)
            i += 1
            bar.next()
            if (save_all):
                i_str = str(i)
                i_format = i_str.zfill(5)
                filename = "files/run1/" + i_format + ".png"
                image.save(filename)

    bar.finish()
    print("Done")
    filename = os.path.splitext(mask_file)[0] + "_ishihara" + ".png"

    image.save(filename)
Exemplo n.º 37
0
 def save_items(self, items, overwrite=False, show_progress=True):
     if not self.is_open:
         raise IOError('Cannot save to non-open dataset.')
     if show_progress:
         if hasattr(items, '__len__'):
             bar = IncrementalBar(max=len(items))
         else:
             bar = IncrementalBar()
     else:
         bar = DummyBar()
     for key, value in items:
         bar.next()
         if key in self:
             if overwrite:
                 self.delete_item(key)
             else:
                 continue
         self.save_item(key, value)
     bar.finish()
Exemplo n.º 38
0
 def calcular_features_thread(self, tweets, identificador):
     if len(tweets) > 0:
         bar = IncrementalBar("Calculando features - " + unicode(identificador),
                              max=len(tweets) * len(self.features),
                              suffix=SUFIJO_PROGRESS_BAR)
         bar.next(0)
         for tweet in tweets:
             for feature in list(self.features.values()):
                 self.abortar_si_feature_no_es_thread_safe(feature)
                 tweet.features[feature.nombre] = feature.calcular_feature(tweet)
                 bar.next()
         bar.finish()
Exemplo n.º 39
0
 def _progress_update(self, item, items_done):
     if self._progress is None:
         self._progress = Bar()
         self._progress.message = '%(index)d/%(max)d'
         self._progress.suffix = ''
     if item:
         items_done[item] = True
     self._progress.max = len(items_done)
     self._progress.index = len(filter(None, items_done.values()))
     with lock:
         try:
             self._progress.update()
         except ZeroDivisionError:
             pass
Exemplo n.º 40
0
def download(resource, output):
    link, file_path = resource.values()

    with open(os.path.join(output, file_path), "wb") as file:
        response = requests.get(link, stream=True)
        total_size_in_bytes = int(response.headers.get("content-length", 0))
        block_size = 1024
        number_of_blocks = total_size_in_bytes // block_size + 1
        with IncrementalBar(f"{file_path}",
                            max=number_of_blocks,
                            suffix="%(percent)d%%") as bar:
            for data in response.iter_content(block_size):
                file.write(data)
                bar.next()
Exemplo n.º 41
0
def buildNotes(path: Path):
    global AnkiNotes
    conn = sqlite3.connect(path.joinpath("collection.anki2").as_posix())
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM notes")
    rows = cursor.fetchall()
    with IncrementalBar('\tBuilding Notes', max=len(rows)) as bar:
        for row in rows:
            nid, guid, mid, mod, usn, tags, flds, sfld, csum, flags, data = row
            reqModel = AnkiModels[str(mid)]
            AnkiNotes[str(nid)] = Note(reqModel, flds)
            AnkiNotes[str(nid)].tags = EmptyString(tags).split(" ")
            bar.next()
        bar.finish()
Exemplo n.º 42
0
 def upload(self):
     photos_list = self.get_photos()
     if photos_list is None:
         print('Профиль является закрытым.\nЗагрузка фото невозможна')
         return
     photos_list = photos_list['data']
     print('Список фото для отправки подготовлен')
     HEADERS = {'Authorization': self.ya_token}
     purums = {'path': str(self.id)}
     res = requests.put('https://cloud-api.yandex.net/v1/disk/resources',
                        params=purums,
                        headers=HEADERS)
     bar = IncrementalBar('Загрузка файлов на диск', max=len(photos_list))
     for photo in photos_list:
         path = str(self.id) + '/' + photo["name"]
         params = {'path': path, 'url': photo['url']}
         resp = requests.post(
             'https://cloud-api.yandex.net/v1/disk/resources/upload',
             params=params,
             headers=HEADERS)
         bar.next()
     print('\nЗагрузка на диск завершена!')
     return 'Done'
Exemplo n.º 43
0
 def _progress_update(self, item, items_done):
     if self._progress is None:
         self._progress = Bar()
         self._progress.message = '%(index)d/%(max)d'
         self._progress.suffix = ''
     if item:
         items_done[item] = True
     self._progress.max = len(items_done)
     self._progress.index = len(filter(None, items_done.values()))
     with lock:
         try:
             self._progress.update()
         except ZeroDivisionError:
             pass
Exemplo n.º 44
0
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor):
    with closing(open_db()) as conexion:
        with closing(conexion.cursor()) as cursor:
            consulta = "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)" \
                       + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s"

            bar = IncrementalBar("Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor),
                                 suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor:
                cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id))
                bar.next()

            conexion.commit()
            bar.finish()
Exemplo n.º 45
0
def cross_validation_y_reportar(clasificador, features, clases, numero_particiones):
    skf = cross_validation.StratifiedKFold(clases, n_folds=numero_particiones)
    features = np.array(features)
    clases = np.array(clases)
    matrices = []
    medidas = defaultdict(list)

    bar = IncrementalBar("Realizando cross-validation\t", max=numero_particiones, suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)
    for entrenamiento, evaluacion in skf:
        clasificador.fit(features[entrenamiento], clases[entrenamiento])
        clases_predecidas = clasificador.predict(features[evaluacion])
        matriz_de_confusion = metrics.confusion_matrix(clases[evaluacion], clases_predecidas).flatten()
        matrices.append(matriz_de_confusion)
        for medida, valor_medida in calcular_medidas(*matriz_de_confusion).items():
            medidas[medida].append(valor_medida)
        bar.next()

    bar.finish()

    promedios = {}

    print('')
    print("Resultados de cross-validation:")
    print('')
    for medida, valor_medida in medidas.items():
        print("\t{medida: >18s}:\t{valor_medida}".format(medida=medida, valor_medida=valor_medida))
        promedio = np.mean(valor_medida)
        promedios[medida] = promedio
        delta = np.std(valor_medida) * 1.96 / math.sqrt(numero_particiones)
        print("Intervalo de confianza 95%:\t{promedio:0.4f} ± {delta:0.4f} --- [{inf:0.4f}, {sup:0.4f}]".format(
            promedio=promedio, delta=delta, inf=promedio - delta, sup=promedio + delta))
        print('')

    imprimir_matriz_metricas(
        promedios['Precision No humor'],
        promedios['Recall No humor'],
        promedios['F1-score No humor'],
        promedios['Precision Humor'],
        promedios['Recall Humor'],
        promedios['F1-score Humor'],
    )

    print('')
    print('')
    print('')
Exemplo n.º 46
0
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor):
    with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion:
        with closing(conexion.cursor()) as cursor:
            consulta = (
                "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)"
                + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s"
            )

            bar = IncrementalBar(
                "Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor), suffix=SUFIJO_PROGRESS_BAR
            )
            bar.next(0)

            for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor:
                cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id))
                bar.next()

            conexion.commit()
            bar.finish()
Exemplo n.º 47
0
def cargar_parecidos_con_distinto_humor():
    with closing(open_db()) as conexion:
        # buffered=True así sé la cantidad que son antes de iterarlos.
        with closing(conexion.cursor() if DB_ENGINE == 'sqlite3' else conexion.cursor(buffered=True)) as cursor:
            consulta = """
            SELECT id_tweet_humor,
                   id_tweet_no_humor
            FROM   tweets_parecidos_distinto_humor
            """

            cursor.execute(consulta)

            pares_ids_parecidos_con_distinto_humor = []

            bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for par_ids in cursor:
                pares_ids_parecidos_con_distinto_humor.append(par_ids)
                bar.next()

            bar.finish()

            return pares_ids_parecidos_con_distinto_humor
Exemplo n.º 48
0
def cargar_parecidos_con_distinto_humor():
    with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion:
        # buffered=True así sé la cantidad que son antes de iterarlos.
        with closing(conexion.cursor(buffered=True)) as cursor:
            consulta = """
            SELECT id_tweet_humor,
                   id_tweet_no_humor
            FROM   tweets_parecidos_distinto_humor
            """

            cursor.execute(consulta)

            pares_ids_parecidos_con_distinto_humor = []

            bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for par_ids in cursor:
                pares_ids_parecidos_con_distinto_humor.append(par_ids)
                bar.next()

            bar.finish()

            return pares_ids_parecidos_con_distinto_humor
Exemplo n.º 49
0
def guardar_features(tweets, **opciones):
    nombre_feature = opciones.pop("nombre_feature", None)
    conexion = mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)
    cursor = conexion.cursor()

    consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s"

    if nombre_feature:
        mensaje = "Guardando feature " + nombre_feature
    else:
        mensaje = "Guardando features"

    bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    for tweet in tweets:
        if nombre_feature:
            cursor.execute(
                consulta,
                (
                    tweet.id,
                    nombre_feature,
                    unicode(tweet.features[nombre_feature]),
                    unicode(tweet.features[nombre_feature]),
                ),
            )
        else:
            for nombre_feature, valor_feature in tweet.features.items():
                cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature)))
        bar.next()

    conexion.commit()
    bar.finish()

    cursor.close()
    conexion.close()
Exemplo n.º 50
0
def guardar_features(tweets, **opciones):
    nombre_feature = opciones.pop('nombre_feature', None)
    conexion = open_db()
    cursor = conexion.cursor()

    consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s"

    if nombre_feature:
        mensaje = 'Guardando feature ' + nombre_feature
    else:
        mensaje = 'Guardando features'

    bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    for tweet in tweets:
        if nombre_feature:
            cursor.execute(
                consulta,
                (
                    tweet.id,
                    nombre_feature,
                    unicode(tweet.features[nombre_feature]),
                    unicode(tweet.features[nombre_feature])
                )
            )
        else:
            for nombre_feature, valor_feature in tweet.features.items():
                cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature)))
        bar.next()

    conexion.commit()
    bar.finish()

    cursor.close()
    conexion.close()
Exemplo n.º 51
0
 def __init__(self):
     self.baseURL = 'https://www.researchgate.net/'
     from progress.bar import IncrementalBar
     self.progress_bar = IncrementalBar('Crawling', max=MIN_NUMBER_OF_PROFILE, suffix='%(percent)d%% %(remaining)s remaining - eta %(eta_td)s')
Exemplo n.º 52
0
def tweets_parecidos_con_distinto_humor(corpus):
    print("Buscando tweets muy parecidos pero con distinto valor de humor...")

    parecidos_con_distinto_humor = set()

    ids_parecidos_con_distinto_humor = cargar_parecidos_con_distinto_humor()

    if ids_parecidos_con_distinto_humor:
        corpus_por_id = {tweet.id: tweet for tweet in corpus}
        for id_tweet_humor, id_tweet_no_humor in ids_parecidos_con_distinto_humor:
            parecidos_con_distinto_humor.add((corpus_por_id[id_tweet_humor], corpus_por_id[id_tweet_no_humor]))
    else:
        subcorpus_cuentas_de_humor = []
        subsubcorpus_cuentas_de_humor_humor = []
        subsubcorpus_cuentas_de_humor_no_humor = []
        for tweet in corpus:
            if tweet.es_chiste:
                subcorpus_cuentas_de_humor.append(tweet)
                if tweet.es_humor:
                    subsubcorpus_cuentas_de_humor_humor.append(tweet)
                else:
                    subsubcorpus_cuentas_de_humor_no_humor.append(tweet)

        subsubcorpus_cuentas_de_humor_no_humor_por_largo = defaultdict(list)

        bar = IncrementalBar("Tokenizando\t\t\t", max=len(subcorpus_cuentas_de_humor),
                             suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)
        for tweet_cuenta_humor in subcorpus_cuentas_de_humor:
            tweet_cuenta_humor.oraciones = Freeling.procesar_texto(tweet_cuenta_humor.texto_original)
            tweet_cuenta_humor.tokens = list(itertools.chain(*tweet_cuenta_humor.oraciones))
            bar.next()
        bar.finish()

        for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor:
            subsubcorpus_cuentas_de_humor_no_humor_por_largo[len(tweet_no_humor.tokens)].append(tweet_no_humor)

        bar = IncrementalBar("Buscando en tweets\t\t", max=len(subsubcorpus_cuentas_de_humor_humor),
                             suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)
        for tweet_humor in subsubcorpus_cuentas_de_humor_humor:
            margen = int(round(len(tweet_humor.tokens) / 5))
            largo_min = len(tweet_humor.tokens) - margen
            largo_max = len(tweet_humor.tokens) + margen

            for largo in range(largo_min, largo_max + 1):
                for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor_por_largo[largo]:
                    if distancia_edicion(tweet_humor.tokens, tweet_no_humor.tokens)\
                            <= max(len(tweet_humor.tokens), len(tweet_no_humor.tokens)) / 5:
                        parecidos_con_distinto_humor.add((tweet_humor, tweet_no_humor))
                        print('')
                        print(tweet_humor.id)
                        print(tweet_humor.texto_original)
                        print("------------")
                        print(tweet_no_humor.id)
                        print(tweet_no_humor.texto_original)
                        print("------------")
                        print('')
            bar.next()
        bar.finish()

        guardar_parecidos_con_distinto_humor(parecidos_con_distinto_humor)

    return parecidos_con_distinto_humor
Exemplo n.º 53
0
 def setup(self, repo_name):
     self.bar = Bar(message='Pulling from {}'.format(repo_name), suffix='')
Exemplo n.º 54
0
    def find_solutions(self, graph_setting_groups):
        results = {}
        # check for solutions for a specific set of interaction settings
        logging.info("Number of interaction settings groups being processed: "
                     + str(len(graph_setting_groups)))
        for strength, graph_setting_group in sorted(
                graph_setting_groups.items(), reverse=True):
            logging.info("processing interaction settings group with "
                         "strength " + str(strength))
            logging.info(str(len(graph_setting_group)) +
                         " entries in this group")
            logging.info("running with " +
                         str(self.number_of_threads) + " threads...")

            temp_results = []
            bar = IncrementalBar('Propagating quantum numbers...',
                                 max=len(graph_setting_group))
            bar.update()
            if self.number_of_threads > 1:
                with Pool(self.number_of_threads) as p:
                    for result in p.imap_unordered(
                            self.propagate_quantum_numbers,
                            graph_setting_group, 1):
                        temp_results.append(result)
                        bar.next()
            else:
                for graph_setting_pair in graph_setting_group:
                    temp_results.append(self.propagate_quantum_numbers(
                        graph_setting_pair))
                    bar.next()
            bar.finish()
            logging.info('Finished!')
            if strength not in results:
                results[strength] = []
            results[strength].extend(temp_results)

        for k, v in results.items():
            logging.info(
                "number of solutions for strength ("
                + str(k) + ") after qn propagation: "
                + str(sum([len(x[0]) for x in v])))

        # remove duplicate solutions, which only differ in the interaction qn S
        results = remove_duplicate_solutions(results, self.filter_remove_qns,
                                             self.filter_ignore_qns)

        node_non_satisfied_rules = []
        solutions = []
        for result in results.values():
            for (tempsolutions, non_satisfied_laws) in result:
                solutions.extend(tempsolutions)
                node_non_satisfied_rules.append(non_satisfied_laws)
        logging.info("total number of found solutions: " +
                     str(len(solutions)))
        violated_laws = []
        if len(solutions) == 0:
            violated_laws = analyse_solution_failure(node_non_satisfied_rules)
            logging.info("violated rules: " + str(violated_laws))

        # finally perform combinatorics of identical external edges
        # (initial or final state edges) and prepare graphs for
        # amplitude generation
        match_external_edges(solutions)
        final_solutions = []
        for sol in solutions:
            final_solutions.extend(
                perform_external_edge_identical_particle_combinatorics(sol)
            )

        return (final_solutions, violated_laws)
def migrate(callback):
    connection = op.get_bind()

    s = sa.select([n.c.node, n.c.path])
    nodes = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating node paths...', max=len(nodes))
    for node, path in nodes:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = n.update().where(n.c.node == node).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([v.c.muser]).distinct()
    musers = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating version modification users...',
                         max=len(musers)
    )
    for muser, in musers:
        match = callback(muser)
        if not match:
            bar.next()
            continue
        u = v.update().where(v.c.muser == muser).values({'muser': match})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([p.c.public_id, p.c.path])
    public = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating public paths...', max=len(public))
    for id, path in public:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = p.update().where(p.c.public_id == id).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([x.c.feature_id, x.c.path])
    xfeatures = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating permission paths...', max=len(xfeatures))
    for id, path in xfeatures:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = x.update().where(x.c.feature_id == id).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([xvals.c.feature_id, xvals.c.key, xvals.c.value])
    s = s.where(xvals.c.value != '*')
    xfeaturevals = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating permission holders...',
                         max=len(xfeaturevals))
    for feature_id, key, value in xfeaturevals:
        account, sep, group = value.partition(':')
        match = callback(account)
        if not match:
            bar.next()
            continue
        new_value = sep.join([match, group])
        u = xvals.update()
        u = u.where(and_(
                xvals.c.feature_id == feature_id,
                xvals.c.key == key,
                xvals.c.value == value))
        u = u.values({'value':new_value})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([g.c.owner, g.c.name, g.c.member])
    groups = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating group owners & members...',
                         max=len(groups))
    for owner, name, member in groups:
        owner_match = callback(owner)
        member_match = callback(member)
        if owner_match or member_match:
            u = g.update()
            u = u.where(and_(
                g.c.owner == owner,
                g.c.name == name,
                g.c.member == member))
            values = {}
            if owner_match:
                values['owner'] = owner_match
            if member_match:
                values['member'] = member_match
            u = u.values(values)
            connection.execute(u)
            bar.next()
    bar.finish()
    t += t * random.uniform(-0.1, 0.1)  # Add some variance
    time.sleep(t)


for bar_cls in (Bar, ChargingBar, FillingSquaresBar, FillingCirclesBar):
    suffix = '%(index)d/%(max)d [%(elapsed)d / %(eta)d / %(eta_td)s]'
    bar = bar_cls(bar_cls.__name__, suffix=suffix)
    for i in bar.iter(range(200)):
        sleep()

for bar_cls in (IncrementalBar, PixelBar, ShadyBar):
    suffix = '%(percent)d%% [%(elapsed_td)s / %(eta)d / %(eta_td)s]'
    with bar_cls(bar_cls.__name__, suffix=suffix, max=200) as bar:
        for i in range(200):
            bar.next()
            sleep()

for spin in (Spinner, PieSpinner, MoonSpinner, LineSpinner, PixelSpinner):
    for i in spin(spin.__name__ + ' ').iter(range(100)):
        sleep()

for singleton in (Counter, Countdown, Stack, Pie):
    for i in singleton(singleton.__name__ + ' ').iter(range(100)):
        sleep()

bar = IncrementalBar('Random', suffix='%(index)d')
for i in range(100):
    bar.goto(random.randint(0, 100))
    sleep()
bar.finish()
Exemplo n.º 57
0
class AuthorCrawler:
    visitedProfileURL = []
    queueProfileURL = []
    visitedArticleURL = []
    queueArticleURL = []
    numberOfCrawlerProfile = 0

    def __init__(self):
        self.baseURL = 'https://www.researchgate.net/'
        from progress.bar import IncrementalBar
        self.progress_bar = IncrementalBar('Crawling', max=MIN_NUMBER_OF_PROFILE, suffix='%(percent)d%% %(remaining)s remaining - eta %(eta_td)s')

    def crawl(self):
        self.queueProfileURL.extend(START_PAGES)
        os.makedirs(AFTER_CRAWL_AUTHOR_DIR, exist_ok=True)
        while self.numberOfCrawlerProfile < MIN_NUMBER_OF_PROFILE:
            while len(self.queueProfileURL) == 0:
                if len(self.queueArticleURL) == 0:
                    self.progress_bar.finish()
                    return
                try:
                    self.queueProfileURL.extend(filter(lambda x: x not in self.visitedProfileURL and x not in self.queueProfileURL,self.getAuthorFromArticle(self.queueArticleURL.pop(0))))
                except:
                    pass
            try:
                self.progress_bar.next()
                self.crawlProfile(self.queueProfileURL.pop(0))
            except:
                pass
        self.progress_bar.finish()

    def getAuthorFromArticle(self, url):

        r = requests.get(url)
        s = BeautifulSoup(r.text, 'html.parser')

        authors = s.findAll('a', class_='display-name')
        authorsList = []
        for author in authors:
            authorsList.append(self.baseURL +author['href'])
        return authorsList

    def getArticleIDFromURL(self, url):
        return re.findall(r'publication/(?P<id>\d+)_', url)[0]

    def crawlProfile(self, profURL):
        if not profURL.endswith('publications'):
            profURL += '/publications'
        r = requests.get(profURL)
        s = BeautifulSoup(r.text, 'html.parser')
        name = s.find('h1', class_='profile-header-name')
        name = name.text
        n = 1
        articles = []
        while True:
            url = profURL+'/'+n.__str__()
            n+=1
            res = self.parseProfilePage(url)
            if res is None or len(res) == 0:
                break
            articles.extend(res)
        self.queueArticleURL.extend(filter(lambda x: x not in self.visitedArticleURL and x not in self.queueArticleURL,map(lambda x : x[0],articles)))
        js = {}
        js['Name'] = name
        js['Article'] = articles

        file_name = '{}.json'.format(name)
        with open(os.path.join(AFTER_CRAWL_AUTHOR_DIR , file_name), 'w') as outfile:
            json.dump(js, outfile)
        self.numberOfCrawlerProfile +=1
        print(self.numberOfCrawlerProfile)

    def parseProfilePage(self, url):  # return top 10 article url
        r = requests.get(url)
        s = BeautifulSoup(r.text, 'html.parser')
        articles = s.findAll('a', class_='ga-publication-item')
        result = []
        for article in articles:
            result.append((self.baseURL + article['href'], self.getArticleIDFromURL(article['href'])))
        return result
Exemplo n.º 58
0
        v = client.create_object_by_hashmap(container, object, map, **kwargs)
    except Fault, fault:
        if fault.status != 409:
            raise
    else:
        return v

    if isinstance(fault.data, types.StringType):
        missing = json.loads(fault.data)
    elif isinstance(fault.data, types.ListType):
        missing = fault.data

    if '' in missing:
        del missing[missing.index(''):]

    bar = IncrementalBar('Uploading', max=len(missing))
    bar.suffix = '%(percent).1f%% - %(eta)ds'
    with open(path) as fp:
        for hash in missing:
            offset = hashes.index(unhexlify(hash)) * blocksize
            fp.seek(offset)
            block = fp.read(blocksize)
            client.update_container_data(container, StringIO(block))
            bar.next()
    bar.finish()

    return client.create_object_by_hashmap(container, object, map, **kwargs)


def download(client, container, object, path):
Exemplo n.º 59
0
class ReportCompile(object):
    def __init__(self, job_name, template, **kwargs):
        self.job_name = job_name
        self.template = template
        self.no_artifacts = kwargs.get('no_artifacts', True)
        self.num_builds = int(kwargs.get('num_builds', composite['num_builds']))
        self.minimum_build = int(kwargs.get('minimum_build', composite['min_build']))
        self.exclude_builds = [int(xb) for xb in kwargs.get('exclude_builds', [])]
        try:
            self.work_dir = local(kwargs.get('work_dir', composite['work_dir']))
            self.work_dir.ensure(dir=True)
        except KeyError:
            self.work_dir = local.mkdtemp()
            print('Writing composite report to {}'.format(self.work_dir.strpath))
        self._progress = None
        self._queue = Queue()
        num_workers = 4
        for __ in xrange(num_workers):
            worker = Thread(target=_queue_worker, args=(self,))
            worker.daemon = True
            worker.start()

    @property
    def ssh_client(self):
        c = SSHClient()
        return c

    @staticmethod
    def _best_result(*results):
        # results should be a list of (result_id, result_value) tuples
        # result ranking, best to worst
        results_ranking = ('passed', 'xfailed', 'failed', 'xpassed', 'skipped', 'error')
        # Go through all the results, returning the best outcome based on results_ranking
        for result in results_ranking:
            for result_id, result_value in reversed(sorted(results, key=lambda r: r[0])):
                if result_value == result:
                    return (result_id, result_value)

    @staticmethod
    def _streak(*results):
        sorted_results = sorted(results, key=lambda r: r[0])
        # the value of the highest numbered (and therefore more recent) build
        latest_result = sorted_results[-1][1]
        streak = 0
        for __, result_value in reversed(sorted_results):
            if result_value == latest_result:
                streak += 1
            else:
                break
        return {'latest_result': latest_result, 'count': streak}

    def _progress_update(self, item, items_done):
        if self._progress is None:
            self._progress = Bar()
            self._progress.message = '%(index)d/%(max)d'
            self._progress.suffix = ''
        if item:
            items_done[item] = True
        self._progress.max = len(items_done)
        self._progress.index = len(filter(None, items_done.values()))
        with lock:
            try:
                self._progress.update()
            except ZeroDivisionError:
                pass

    def _progress_finish(self):
        self._progress.finish()
        self._progress = None

    def compile(self):
        return self.composite_report()

    def build_numbers(self):
        api = trackerbot.api()
        builds = trackerbot.depaginate(api,
            api.build.get(job_name=self.job_name, template=self.template)
        )
        build_numbers = []
        # XXX relying on trackerbot giving us the most recent builds first, should be explicit
        for build in builds.get('objects', []):
            if (build['number'] not in self.exclude_builds and
                    build['number'] >= self.minimum_build):
                build_numbers.append(build['number'])
                if self.num_builds and len(build_numbers) == self.num_builds:
                    break
        if build_numbers:
            print('Pulling reports from builds {}'.format(
                ', '.join([str(n) for n in build_numbers])))
        return build_numbers

    def template_log_dirs(self):
        log_dir_tpl = composite['log_dir_tpl']
        log_dirs = []
        for build_number in self.build_numbers():
            log_dirs.append((build_number, log_dir_tpl.format(self.job_name, build_number)))
        return log_dirs

    def test_reports(self):
        print('Collecting test reports to determine best build nodes')
        log_dirs = self.template_log_dirs()
        reports = {}
        c = self.ssh_client
        jenkins_host = composite['jenkins_host']
        c.connect(jenkins_host, username=credentials['jenkins-result']['username'],
            password=credentials['jenkins-result']['password'],
            timeout=10,
            allow_agent=False,
            look_for_keys=False,
            gss_auth=False)
        builds_done = {}
        self._progress_update(None, builds_done)
        for build_number, log_dir in log_dirs:
            build_work_dir = local(self.work_dir.join(str(build_number)))
            build_work_dir.ensure(dir=True)
            _remote = local(log_dir).join('test-report.json').strpath
            _local = build_work_dir.join('test-report.json').strpath
            builds_done[build_number] = False
            self._progress_update(None, builds_done)
            self._queue.put((_remote, _local, build_number, builds_done))
        self._queue.join()
        self._progress_finish()
        for build_number, __ in log_dirs:
            build_work_dir = local(self.work_dir.join(str(build_number)))
            for path in build_work_dir.visit('*/test-report.json'):
                try:
                    report = json.load(path.open())
                    reports[build_number] = report
                except:
                    # invalid json, skip this report
                    pass
        return reports

    def composite_status(self, reports=None):
        jenkins_host = composite['jenkins_host']
        reports = reports or self.test_reports()
        results = {}
        # results dict structure:
        # {
        #   nodeid: {
        #     'build_results': {build_id_1: build_id_1_result, build_id_2: ...}
        #     'best_result': (best_build_id, best_build_result)
        #     'result_url': http://jenkins/path/to/build
        #     'streak': (latest_build_result, number_of_results_in_a_row)
        #   },
        #   nodeid: {
        #     ...
        #   }
        # }
        for build_number, report in reports:
            for nodeid, nodedata in report.get('tests', {}).items():
                try:
                    # Try to pull the build statuses, skip the node if we can't
                    node_results_temp = nodedata['statuses']['overall']
                    node_results = results.setdefault(nodeid, {'build_results': {}})
                    node_results['build_results'][build_number] = node_results_temp
                except KeyError:
                    continue
        for nodeid, nodedata in results.items():
            node_results = nodedata['build_results'].items()
            nodedata['best_result'] = self._best_result(*node_results)
            nodedata['result_url'] = 'https://{}/job/{}/{}/'.format(
                jenkins_host, self.job_name, nodedata['best_result'][0]
            )
            nodedata['streak'] = self._streak(*node_results)
            test_counts[nodedata['best_result'][1]] += 1
        return results

    def composite_report(self):
        reports = self.test_reports()
        composite_status = self.composite_status(reports.iteritems())
        composite_report = {
            'test_counts': test_counts,
            'tests': OrderedDict()
        }

        print('Collecting artifacts from best build nodes')
        # tracking dict for file pull progress
        remotes_done = {}
        self._progress_update(None, remotes_done)
        for nodeid, nodedata in sorted(composite_status.items(),
                key=lambda s: s[1]['streak']['count'], reverse=True):
            best_build_number = nodedata['best_result'][0]
            best_build_test = reports[best_build_number]['tests'][nodeid]
            composite_report['tests'][nodeid] = best_build_test
            composite_report['tests'][nodeid]['composite'] = nodedata
            reports[best_build_number]['tests'][nodeid]['files'] = []
        # wait for all the files to arrive before building the report
        self._queue.join()
        self._progress_finish()
        json.dump(composite_report, self.work_dir.join('composite-report.json').open('w'),
            indent=1)
        try:
            passing_percent = (100. * (test_counts['passed'] + test_counts['skipped'] +
                test_counts['xfailed'])) / sum(test_counts.values())
            print('Passing percent:', passing_percent)
            # XXX: Terrible artifactor spoofing happens here.
            print('Running artifactor reports')
            r = reporter.ReporterBase()
            reports_done = {'composite': False, 'provider': False}
            self._progress_update(None, reports_done)
            r._run_report(composite_report['tests'], self.work_dir.strpath)
            self._progress_update('composite', reports_done)
            r._run_provider_report(composite_report['tests'], self.work_dir.strpath)
            self._progress_update('provider', reports_done)
            self._progress_finish()
        except ZeroDivisionError:
            print('No tests collected from test reports (?!)')
        return composite_report

    def _translate_artifacts_path(self, artifact_path, build_number):
        preamble = composite['preamble'].format(self.job_name)
        replacement = composite['replacement'].format(self.job_name, build_number)
        artifact_remote = artifact_path.replace(preamble, replacement)
        artifact_local = self.work_dir.join(str(build_number), artifact_path[len(preamble):])
        try:
            assert artifact_remote.startswith(composite['remote_sw'])
            assert artifact_local.strpath.startswith(self.work_dir.strpath)
        except AssertionError:
            print('wat?')
            print('path', artifact_path)
            print('remote', artifact_remote)
            print('local', artifact_local.strpath)
        return artifact_remote, artifact_local.strpath
Exemplo n.º 60
0
        cuf.unit = unit
        cuf.save()

    return unit


def populate_rpm(model, i):
    return create_rpm_or_srpm(model, i)


def populate_srpm(model, i):
    return create_rpm_or_srpm(model, i)

for model, num_to_create in to_create.items():
    model_name = model._meta.model_name
    bar = Bar('Creating {}'.format(model_name), max=num_to_create)
    model_count = model.objects.count()
    create_f = globals()['populate_{}'.format(model_name)]

    for i in range(num_to_create):
        ident = '{}{}'.format(model_name, i)
        if i < model_count:
            unit = model.objects.all()[i]
        else:
            unit = create_f(model, i)
        globals()[ident] = unit
        bar.next()
    bar.finish()

# This bit is special: Associate all rpms with the first repo,
# for maximum relational query fun