示例#1
0
def load_images(img_path):
    '''
        Function to load images into the main memory

        img_path    : Relative path to the image directory
        return      : numpy array of images present in that directory
                      sorted in the numerical order
    '''
    image_files_names = [name for name in os.listdir(img_path)]
    image_files_names = [name.split('.')[0] for name in os.listdir(img_path)]
    image_files_names.sort()
    image_files_names = [
        img_path + name + '.png' for name in image_files_names
    ]

    images = []
    bar = FillingSquaresBar('Loading Images from {}'.format(img_path),
                            max=len(image_files_names))
    for i in range(len(image_files_names)):
        image = cv2.imread(image_files_names[i])
        images.append(image)
        bar.next()
    bar.finish()

    images = np.array(images)
    return images
示例#2
0
 def extract_data(self):
     if not self.views:
         extract_data_views = [self.view]
     else:
         extract_data_views = self.views
     for extract_data_view in extract_data_views:
         folders = getattr(self.db, extract_data_view)
         if self.range:
             folders = folders[int(self.range.split(':')[0]):int(self.range.split(':')[1])]
         if self.limit:
             folders = folders.head(self.limit)
         if self.licence_id:
             folders = folders[folders.REFERENCE == self.licence_id]
         if self.id:
             folders = folders[folders.id == int(self.id)]
         bar = FillingSquaresBar('Processing licences for {}'.format(str(extract_data_view)), max=folders.shape[0])
         for id, licence in folders.iterrows():
             self.get_licence(id, licence)
             bar.next()
         bar.finish()
         export_error_csv([self.parcel_errors, self.street_errors])
         if self.iterate is True:
             try:
                 self.validate_data(self.data, 'GenericLicence')
             except Exception:
                 raise IterationError('Schema change during iterative process')
示例#3
0
def createModeFolder(assetsDir, modeDir):
    print('Begin editing furniture description files. Please wait!')
    os.chdir(assetsDir)
    bar = FillingSquaresBar('] Creating mode files', max=countTaskLen())

    for root, dirs, files in os.walk("."):
        for dir in dirs:
            if not os.path.exists(modeDir + os.path.join(root, dir)[1:]):
                os.mkdir(modeDir + os.path.join(root, dir)[1:])

        for file in files:
            if file.endswith(".object"):
                bar.next()
                with open(os.path.join(root, file), encoding='utf-8') as of:
                    str = jsmin.jsmin(of.read())
                    data = json.loads(str)

                    try:
                        if data['category'] not in ['furniture', 'decorative']:
                            continue

                        tags = data['colonyTags']
                        desc = data['description']

                        wasUsed = max(desc.find('TAGS:'), desc.find('ТЕГИ:'))
                        if wasUsed != -1:
                            desc = desc[:(wasUsed) - 1]

                        if enableTranslating:
                            # sometimes connection is interrupted
                            # this check for avoid repeated translations on script restarting
                            if os.path.isfile(modeDir +
                                              os.path.join(root, file)[1:] +
                                              '.patch'):
                                continue

                            desc = ts.translate(text=desc,
                                                src='en',
                                                dest=[enableTranslating])

                    except KeyError:
                        continue

                    generate = json.loads(
                        '[{"value": "", "path": "/description", "op": "replace"}]'
                    )
                    generate[0]['value'] = desc + ' TAGS: ' + ', '.join(tags)

                    with open(modeDir + os.path.join(root, file)[1:] +
                              '.patch',
                              'w',
                              encoding='utf-8') as outfile:
                        json.dump(generate,
                                  outfile,
                                  sort_keys=True,
                                  indent=2,
                                  ensure_ascii=False)

    print('\nCleaning mode folder...')
    delEmpryDirs(modeDir)
示例#4
0
 def validate_data(self, data, type):
     bar = FillingSquaresBar('Validating licences with : {}'.format(type),
                             max=len(data))
     for licence in data:
         self.validate_schema(licence, type)
         bar.next()
     bar.finish()
示例#5
0
 def _mine(self, progress=True):
     if progress:
         bar = FillingSquaresBar('Mining %s:' % self.grid.name,
                                 max=self.grid.dim)
         for i in range(self.grid.dim):
             p = {
                 'lat': self.grid.points[i][0],
                 'lng': self.grid.points[i][1]
             }
             query_result = self.searcher(lat_lng=p,
                                          radius=self.r,
                                          types=self.place_type)
             for place in self.get_places(query_result):
                 yield (place)
             bar.next()
         bar.finish()
     else:
         for i in range(dim):
             p = {
                 'lat': self.grid.points[i][0],
                 'lng': self.grid.points[i][1]
             }
             query_result = self.searcher(lat_lng=p,
                                          radius=self.r,
                                          types=self.place_type)
             for place in self.get_places(query_result):
                 yield (place)
示例#6
0
def run_bar():
    mylist = [1, 2, 3, 4, 5]
    bar = FillingSquaresBar('Bar', max=len(mylist))
    for item in mylist:
        bar.next()
        time.sleep(0.5)
    bar.finish()
示例#7
0
def main():
    textfile = args.file

    if filConfig.exclusions:
        textfile = 'urls-with-exclusions.txt'
    if filConfig.download:
        target.download = filConfig.download

    # Retrieve number of url lines
    content = []
    with open(textfile, 'r') as f:
        content = f.readlines()
# you may also want to remove whitespace characters like `\n` at the end of each line
        content = [x.strip() for x in content]
        # Return lines to config object
        filConfig.numOfUrls = len(content)

    # Open textfile again to actually run.


    if filConfig.quiet:
        print("\n[ Searching {} urls ]".format(filConfig.numOfUrls))
        bar = FillingSquaresBar('Filleting Phish', max = filConfig.numOfUrls)
        print("\n")
    
    if filConfig.verbose:
        filConfig.show()

    for line in content:
        

        try:
            if filConfig.quiet:
                bar.next()
            # Pass URL to target class object
                
            # Class object passed to urlConstruct to create attributes 

            # Launch GeoIP Function
            if filConfig.geoIpEnabled:
                fil_getGeoIP(target, filConfig) # Can this be placed inside connector?
            
            # If output selected collect return and place in output function
            if filConfig.output:
                index = fil_connector(target, filConfig)
                fil_output(index, filConfig.output)
            else:
                fil_connector(filConfig, content)
                print("done")
            
        except (KeyboardInterrupt, SystemExit):

                print("\n\n\
Goodbye!       ,-,\n\
             ,/.(     __\n\
          ,-'    `!._/ /\n\
         > X )<|    _ <\n\
          `-....,,;' \_\n")
                exit()
示例#8
0
def test_model(saved_model_name):
    model_to_test = torch.load(saved_model_name)

    # batch files are of format "batch-{batch_num}-{model_name}.pth", retreive batch_num
    batch_number = saved_model_name.split("-")[1]
    testing_loader = get_testing_loader(batch_num=batch_number)

    # no_grad call allows processing bigger batches at once
    with torch.no_grad():
        model_to_test.eval()

        truth_list, predictions_list = [], []
        top_1_accuracy, top_5_accuracy = 0, 0
        testing_bar = FillingSquaresBar(message='Testing',
                                        max=len(testing_loader))

        for inputs, labels in testing_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # predict inputs, and reverse the LogSoftMax
            real_predictions = torch.exp(model_to_test(inputs))

            # Get top class of outputs
            _, top_1_class = real_predictions.topk(k=1)
            _, top_5_classes = real_predictions.topk(k=5)

            # Run predictions
            top_1_equals = top_1_class == labels.view(*top_1_class.shape)
            top_5_equals = top_5_classes == labels.view(*top_1_class.shape)

            # Count all the accurate guesses
            top_1_accuracy += top_1_equals.sum().item()
            top_5_accuracy += top_5_equals.sum().item()

            # append to confusion matrix lists
            for truth, prediction in zip(labels.view(-1),
                                         top_1_class.view(-1)):
                predictions_list.append(prediction.item())
                truth_list.append(truth.item())

            testing_bar.next()

    top_1_testing_accuracy = top_1_accuracy / len(testing_loader.dataset)
    top_5_testing_accuracy = top_5_accuracy / len(testing_loader.dataset)
    print(f'''\nAccuracy
        top-1: {helper.to_percentage(top_1_testing_accuracy)}
        top-5: {helper.to_percentage(top_5_testing_accuracy)}''')

    print("Calculating and printing per-class accuracy...")
    print_per_class_accuracy(truth_list, predictions_list)

    print("Displaying confusion matrix...")
    confusionMatrixPrettyPrint.plot_confusion_matrix_from_data(
        y_test=truth_list,
        predictions=predictions_list,
        columns=class_names,
        figsize=[15, 15],
        cmap='twilight')
示例#9
0
class FacebookMiner(object):
    page_fields = '?fields=is_community_page,category,category_list,fan_count,hours,link,location,name,name_with_location_descriptor,overall_star_rating,parking,phone,rating_count,single_line_address,store_location_descriptor,website,were_here_count'

    def __init__(self,
                 mine_points,
                 API_KEY,
                 search_rayon=1000,
                 categories=['FOOD_BEVERAGE'],
                 _type='place'):
        self.points = mine_points
        self.graph = GraphAPI(API_KEY, version='2.9')
        self.categories = categories
        self.r = search_rayon
        self.dim = len(self.points)
        self._type = _type

    def _mine(self, progress=True):
        if progress:
            self.bar = FillingSquaresBar('Mining:', max=self.dim)
            for p in self.points:
                for pla in self.get_places(p):
                    yield pla
                self.bar.next()
            self.bar.finish()
        else:
            for p in self.points:
                for pla in self.get_places(p):
                    yield pla

    def get_places(self, p):
        c = str(p[0]) + ',' + str(p[1])
        nearby_ids = [
            l['id'] for l in self.graph.search(term='',
                                               categories=str(self.categories),
                                               type=self._type,
                                               center=c,
                                               distance=self.r)['data']
        ]
        for _id in nearby_ids:
            entity = self.graph.get(str(_id) + self.page_fields)
            entity['fb_id'] = entity.pop('id')
            try:

                entity['location']['latitude'] = float(
                    entity['location'].pop('latitude'))
                entity['location']['longitude'] = float(
                    entity['location'].pop('longitude'))
            except Exception:
                pass
            try:
                entity['overall_star_rating'] = float(
                    entity.pop('overall_star_rating'))
            except Exception:
                pass
            yield entity
示例#10
0
def Pb5():
    from progress.bar import FillingSquaresBar
    import time

    bar = FillingSquaresBar('进度条5', max=100)  #max的值100,可调节

    for i in range(100):  #这个也需要适当调节
        bar.next()
        time.sleep(0.1)  #延迟时间,可调节,0.1~1之间最佳

    bar.finish()
示例#11
0
 def save_data(self, data: List[object]) -> int:
     """
        Save prepared data to file
     """
     file_manager.check_db_file()
     bar = FillingSquaresBar('Save data', suffix='%(percent)d%%', max=1)
     with self._file_path.open(mode='w') as f:
         bar.next()
         f.write(ObjectSerializer().encode_object(constants.json_format,
                                                  data))
     return 0
示例#12
0
def download_from_eoddata(start_date, end_date, market, driver):
    """Provide datetime.date arguments `start_date` and `end_date`, a string
    `market`, and Selenium driver `driver`.  The function will then download
    the EOD data for the appropriate market and dates from the eoddata
    """

    # navigate to the downloads page
    driver.get('http://www.eoddata.com/download.aspx')

    # get a list of the all of the hyperlink tags in the pagen
    bs_obj = BeautifulSoup(driver.page_source, "lxml")
    url_list = bs_obj.find_all('a')

    # each iteration steps through the list of hyperlink tags in the page until
    # it finds the list of example downloads, and then extracts the `k` field
    k = ''
    for url in url_list:

        if not url.has_attr('href'):
            continue

        # looks for a link of the form
        # /data/filedownload.aspx?e=INDEX&sd=20180606&ed=20180606&d=4&k=ph72h4ynw2&o=d&ea=1&p=0
        # Once we find one, we need to extract the `k` field so that we can use
        # it when constructing our own HTML request.
        url_string = url.attrs['href']
        if re.match('/data/filedownload.aspx', url_string):
            k = re.search('k=([^&]*)', url_string).group(1)
            break
    if not k:
        raise Exception

    # construct the URL according to the dates and market that we want to
    # download
    url_template = '{url_base}?e={e}&sd={sd}&ed={ed}&d={d}&k={k}&o={o}&ea={ea}&p={p}'
    url_download = url_template.format(
        url_base='http://www.eoddata.com/data/filedownload.aspx',
        e=market,
        sd=start_date.strftime('%Y%m%d'),
        ed=end_date.strftime('%Y%m%d'),
        d='4',
        k=k,
        o='d',
        ea='1',
        p='0')
    # submit the download request
    driver.get(url_download)

    # wait for 10 seconds to ensure that the file has time to download
    bar = FillingSquaresBar('Downloading data ', max=100)
    for i in range(100):
        bar.next()
        time.sleep(0.1)
    bar.finish()
示例#13
0
def get_point_cloud(B_matrix,
                    parallax_map,
                    color_map,
                    poses,
                    mask_lower_bound=2):

    point_cloud = []
    point_cloud_colors = []

    bar = FillingSquaresBar('Generating Frame Point Cloud',
                            max=len(parallax_map))
    for i in range(len(parallax_map)):

        p_map = parallax_map[i]
        mask = (p_map[:, 2] > mask_lower_bound)
        p_map = p_map[mask, :]
        c_map = color_map[i]
        c_map = (c_map[mask, :] / 255.0).astype('float64')

        point_cloud_colors.append(c_map)

        point_cloud.append(B_matrix @ p_map.T)

        point_cloud[i] = point_cloud[i] / point_cloud[i][3]

        point_cloud[i] = poses[i] @ point_cloud[i]
        point_cloud[i] = point_cloud[i].T

        bar.next()
    bar.finish()

    registered_point_cloud = point_cloud[0]
    registered_point_cloud_colors = point_cloud_colors[0]

    bar = FillingSquaresBar('Registering Global Point Cloud',
                            max=len(point_cloud) - 1)
    for i in range(1, len(point_cloud)):
        registered_point_cloud = np.concatenate(
            (registered_point_cloud, point_cloud[i]), axis=0)
        registered_point_cloud_colors = np.concatenate(
            (registered_point_cloud_colors, point_cloud_colors[i]), axis=0)
        bar.next()
    bar.finish()

    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(registered_point_cloud)
    pcd.colors = o3d.utility.Vector3dVector(registered_point_cloud_colors)

    o3d.io.write_point_cloud(
        OP1_DIR + "/point_cloud_{}.ply".format(
            datetime.datetime.now().strftime("%d-%b-%Y %H:%M:%S.%f")), pcd)

    return registered_point_cloud, registered_point_cloud_colors
示例#14
0
def image_point_cloud(point_cloud, point_cloud_colors, poses, image_width,
                      image_height):
    bar = FillingSquaresBar('Imaging the Point Cloud with the given poses',
                            max=len(poses))
    for i in range(len(poses)):
        P = poses[i]
        R = P[:, :3]
        T = -1 * (R.T @ P[:, 3])
        R = R.T
        im = get_image(i, point_cloud, point_cloud_colors, R, T, K,
                       image_width, image_height)
        bar.next()
    bar.finish()
示例#15
0
def download_video(link):
    yt=pytube.YouTube(link)
    stream=yt.streams.first()
    video_length=get_time(yt.length)
    video_size=get_size(stream.filesize)
    print("Downloading \""+yt.title+"\" Length : "+video_length)
    print("\tFile Size : "+video_size)
    bar=FillingSquaresBar("Progress : ", suffix="%(percent)d%%")
    for i in range(100):
        stream=yt.streams.first()
        stream.download(SAVE_PATH)
        bar.next()
    bar.finish()
def create_parallax_map(images_left, images_right):
    '''
        Return a parallax map given two stereo rectified images

        images_left: np array of the left stereo images
        images_left: np array of the right stereo images
        return:
    '''
    if len(images_left) != len(images_right):
        print("Error: #images_left must be equal to #images_right")
        return False

    window_size = 5
    minDisparity = -39
    numDisparities = 144
    stereo = cv2.StereoSGBM_create(minDisparity=-39,
                                   numDisparities=144,
                                   blockSize=5,
                                   P1=8 * 3 * window_size**2,
                                   P2=64 * 3 * window_size**2,
                                   disp12MaxDiff=1,
                                   uniquenessRatio=10,
                                   speckleWindowSize=100,
                                   speckleRange=32,
                                   preFilterCap=63,
                                   mode=3)

    disparity = []
    parallax_map = []

    bar = FillingSquaresBar('Extracting Disparity Map', max=len(images_left))
    for k in range(len(images_left)):
        im_right = cv2.cvtColor(images_right[k], cv2.COLOR_BGR2GRAY)
        im_left = cv2.cvtColor(images_left[k], cv2.COLOR_BGR2GRAY)
        disparity = stereo.compute(im_right, im_left).astype('float64')
        disparity = (disparity - minDisparity) / numDisparities

        parallax_map.append([])
        for y in range(disparity.shape[0]):
            for x in range(disparity.shape[1]):
                parallax_map[k].append([x, y, disparity[y, x], 1])

        bar.next()

    parallax_map = np.array(parallax_map)

    bar.finish()
    return parallax_map, disparity
示例#17
0
def insert_data_to_db():

    for uf in estados.estados:
        dir = str(path.csv_path) + '/' + uf
        os.chdir(dir)
        city_list_lenght = 0
        for file in glob.glob('*.*'):
            city_list_lenght += 1

        bar = FillingSquaresBar(uf, max=city_list_lenght)
        for file in glob.glob('*.*'):
            try:
                bar.next()
                DB().insert_many(file, 'empresas')

            except:
                pass
示例#18
0
def train(logbook, net, device, loss_fn, opt, train_l):
    """Run one epoch of the training experiment."""
    logbook.meter.reset()
    bar = FillingSquaresBar('Training \t', max=len(train_l))
    controllers = indiv.Controller.getControllers(net)
        
    for i_batch, data in enumerate(train_l):
        
        # load data onto device
        inputs, gt_labels = data
        inputs            = inputs.to(device)
        gt_labels         = gt_labels.to(device)
        
        # forprop
        pr_outs           = net(inputs)
        loss              = loss_fn(pr_outs, gt_labels)
        
        # update statistics
        logbook.meter.update(pr_outs, gt_labels, loss.item(), track_metric=logbook.track_metric)
        bar.suffix = 'Total: {total:} | ETA: {eta:} | Epoch: {epoch:4d} | ({batch:5d}/{num_batches:5d})'.format(
                total=bar.elapsed_td,
                eta=bar.eta_td,
                epoch=logbook.i_epoch,
                batch=i_batch + 1,
                num_batches=len(train_l))
        bar.suffix = bar.suffix + logbook.meter.bar()
        bar.next()
        
        # backprop
        opt.zero_grad()
        loss.backward()
        opt.step()
        for ctrl in controllers: 
            ctrl.step_postOptimStep()
        
    bar.finish()
    stats = {
        'train_loss':   logbook.meter.avg_loss,
        'train_metric': logbook.meter.avg_metric
    }
    for k, v in stats.items():
        if v:
            logbook.writer.add_scalar(k, v, global_step=logbook.i_epoch)
    logbook.writer.add_scalar('learning_rate', opt.param_groups[0]['lr'], global_step=logbook.i_epoch)
    return stats
示例#19
0
def prepare(rkeys, version, force=False, cores=1, hashing=None):
    pool = Pool(cores)
    m = Manager()
    queue = m.Queue()
    jobs = [rkey + (version, force, hashing, queue) for rkey in rkeys]
    bar = Bar("[1/3]",
              max=len(jobs),
              suffix="%(percent).1f%% / %(elapsed_td)s / ETA %(eta_td)s")
    bar.start()
    res = pool.map_async(prepare2, jobs, chunksize=1)
    todo = len(jobs)
    while todo:
        queue.get()
        todo -= 1
        bar.next()
    bar.finish()
    pool.close()
    pool.join()
示例#20
0
def download_audio(link):
    yt=pytube.YouTube(link)
    bar=FillingSquaresBar("Downloading Audio : ", suffix="%(percent)d%%")
    for i in range(100):
        stream=yt.streams.filter(only_audio=True).first()
        bad_chars=[";", ":", "!", "*", ' ', "$", "@", "(", ")", "[", "]", "|", ".", "\"", "\'", ","]
        _filename=yt.title
        for i in bad_chars:
            _filename=_filename.replace(i, "_")
        mp4_name="download/%s.mp4"%_filename
        mp3_name="download/%s.mp3"%_filename
        stream.download(SAVE_PATH, _filename)
        bar.next()

    print("\nPerforming required conversions...")
    ffmpeg=('ffmpeg -loglevel panic -i %s ' % mp4_name + mp3_name)
    subprocess.call(ffmpeg, shell=True)
    os.remove(mp4_name)
    bar.finish()
示例#21
0
def test(logbook, net, device, loss_fn, test_l, valid=False, prefix=None):
    """Run a validation epoch."""
    logbook.meter.reset()
    bar_title = 'Validation \t' if valid else 'Test \t'
    bar       = FillingSquaresBar(bar_title, max=len(test_l))
    with torch.no_grad():
        for i_batch, data in enumerate(test_l):
            
            # load data onto device
            inputs, gt_labels     = data
            inputs                = inputs.to(device)
            gt_labels             = gt_labels.to(device)
            
            # forprop
            tensor_stats, pr_outs = net.forward_with_tensor_stats(inputs)
            loss                  = loss_fn(pr_outs, gt_labels)
            
            # update statistics
            logbook.meter.update(pr_outs, gt_labels, loss.item(), track_metric=True)
            bar.suffix = 'Total: {total:} | ETA: {eta:} | Epoch: {epoch:4d} | ({batch:5d}/{num_batches:5d})'.format(
                total=bar.elapsed_td,
                eta=bar.eta_td,
                epoch=logbook.i_epoch,
                batch=i_batch + 1,
                num_batches=len(test_l))
            bar.suffix = bar.suffix + logbook.meter.bar()
            bar.next()
    bar.finish()

    if prefix == None: 
        prefix = 'valid' if valid else 'test'
    stats = {
        prefix+'_loss':   logbook.meter.avg_loss,
        prefix+'_metric': logbook.meter.avg_metric
    }
    if valid:
        for k, v in stats.items():
            if v:
                logbook.writer.add_scalar(k, v, global_step=logbook.i_epoch)
        for name, tensor in tensor_stats:
            logbook.writer.add_histogram(name, tensor, global_step=logbook.i_epoch)
    return stats
示例#22
0
    def extract_data(self):
        folders = getattr(self.db, self.view)
        if self.range:
            folders = folders[int(self.range.split(':')[0]):int(self.range.split(':')[1])]
        if self.limit:
            folders = folders.head(self.limit)
        if self.licence_id:
            folders = folders[folders.DOSSIER_NUMERO == self.licence_id]

        bar = FillingSquaresBar('Processing licences', max=folders.shape[0])
        for id, licence in folders.iterrows():
            self.get_licence(id, licence)
            bar.next()
        bar.finish()
        export_error_csv([self.parcel_errors, self.street_errors])
        if self.iterate is True:
            try:
                self.validate_data(self.data, 'GenericLicence')
            except Exception:
                raise IterationError('Schema change during iterative process')
示例#23
0
    def generate_test_files(self):
        if not os.path.exists(self.test_directory):
            os.makedirs(self.test_directory)
        else:
            shutil.rmtree(self.test_directory)
            os.makedirs(self.test_directory)
        os.chdir(self.test_directory)

        bar = FillingSquaresBar('Processing', max=self.files_number)
        for i in range(self.files_number):
            filename = "file" + str(
                i
            ) + f".{self.file_extention[rand(0,len(self.file_extention) - 1 )]}"
            with open(filename, 'wb') as new_random_file:
                d1 = rand(1, 1000)
                d2 = rand(1, 999)
                dimension = d1 * self.file_size + d2
                new_random_file.write(os.urandom(dimension))
            bar.next()
        bar.finish()
示例#24
0
 def get_trips(self) -> Tuple[Trip]:
     bar = FillingSquaresBar("Processing", max=len(self._waypoints))
     for waypoint in self._waypoints:
         bar.next()
         if not self.last_valid_waypoint:
             self.last_valid_waypoint = waypoint
             continue
         distance = calculate_distance(self.last_valid_waypoint, waypoint)
         if distance < 15:
             continue
         time_difference = calculate_minute_difference(
             self.last_valid_waypoint.timestamp, waypoint.timestamp)
         if time_difference <= 3:
             continue
         trip = Trip(start=self.last_valid_waypoint,
                     end=waypoint,
                     distance=distance)
         self.last_valid_waypoint = waypoint
         self.trips.append(trip)
     return tuple(self.trips)
示例#25
0
def scrape_symbols_tsx_website(
        tsx_company_directory_url='https://www.tmxmoney.com/en/research/listed_company_directory.html',
        webdriver_exec_path='/usr/lib/chromium-browser/chromedriver'):
    driver = webdriver.Chrome(executable_path=webdriver_exec_path)
    driver.get(tsx_company_directory_url)
    elem = driver.find_element_by_id("SearchKeyword")
    elem.clear()
    elem.send_keys("^")
    button = driver.find_element_by_id("btn-search")
    driver.execute_script("arguments[0].click();", button)
    time.sleep(5)

    table = driver.find_element_by_id('tresults')
    rows = table.find_elements_by_tag_name('tr')

    extracted_table = pd.DataFrame(columns=['company_name', 'symbol'])

    bar = FillingSquaresBar('Extracting', max=len(rows))

    for row in rows[1:]:
        cols = row.find_elements_by_tag_name('td')
        to_append = [None, None]

        for i in range(len(cols)):
            to_append[i] = cols[i].text.lstrip()

        extracted_table = extracted_table.append(
            {
                'company_name': to_append[0],
                'symbol': to_append[1]
            },
            ignore_index=True)

        bar.next()

    driver.close()
    bar.finish()

    final_table = extracted_table.loc[~extracted_table['symbol'].isnull(), :]

    return final_table
示例#26
0
 def _mine(self, post_list=[], progress=True):
     post_list = post_list if post_list != [] else self.posts_ids
     post_bach = [{
         'method': 'GET',
         'relative_url': p_id + self.post_fields
     } for p_id in post_list]
     n = len(post_list)
     posts = self.graph.batch(post_bach)
     if progress:
         bar = FillingSquaresBar('Mining %s:' % self.name, max=n)
         for post in posts:
             p = self.clean_post(post)
             if p != None:
                 yield p
             bar.next()
         bar.finish()
     else:
         for post in posts:
             p = self.clean_post(post)
             if p != None:
                 yield p
示例#27
0
def download_resources(resources, path):
    """
    Download page resources to the specified directory.

    Show the download progress in terminal.

    Args:
        resources: list of (url, local storage subpath) tuples
        path: parent directory for local storage
    """
    download_progress = FillingSquaresBar(
        'Downloading page resources',
        max=len(resources),
        suffix='%(percent)d%%',  # noqa:WPS323
    )
    for resource_url, resource_filename in resources:
        logging.debug(
            'resource_url: {0}, resource_filename: {1}'.format(
                resource_url,
                resource_filename,
            ), )
        try:
            resource_content, resource_binary, _ = send_request(resource_url, )
        except Exception as resource_request_error:
            logging.warning(
                'Resource download failed: {0}'.format(
                    str(resource_request_error), ),
                exc_info=logger.isEnabledFor(logging.DEBUG),
            )
            continue

        write_to_file(
            path / resource_filename,
            resource_content,
            binary_mode=resource_binary,
        )
        download_progress.next()  # noqa: B305

    download_progress.finish()
示例#28
0
def extract_color(images):
    '''
        Function to get the RGB color values from the images

        images: np array of images
        return: np array of the color values extracted from the images

    '''
    color_map = []

    bar = FillingSquaresBar('Extracting Color Map', max=len(images))
    for k in range(len(images)):
        color_map.append([])
        image = cv2.cvtColor(images[k], cv2.COLOR_BGR2RGB)
        for y in range(image.shape[0]):
            for x in range(image.shape[1]):
                color_map[k].append(image[y, x, :])
        bar.next()
    color_map = np.array(color_map)

    bar.finish()
    return color_map
示例#29
0
def make(rkeys, out=None, hashing=None):
    dat = []
    bar = Bar("[2/3]",
              max=len(rkeys),
              suffix="%(percent).1f%% / %(elapsed_td)s / ETA %(eta_td)s")
    bar.start()
    for (bid, pid, problem, limit) in rkeys:
        f_dat = expres.results.path(bid,
                                    pid,
                                    problem,
                                    limit,
                                    ext="in" if hashing else "pre")
        if out:
            tmp = open(f_dat).read().strip()
            if tmp:
                out.write(tmp)
                out.write("\n")
        else:
            dat.extend(open(f_dat).read().strip().split("\n"))
        bar.next()
    bar.finish()
    return dat if not out else None
示例#30
0
  def convertRange(self, start, end):
    if not len(self.targets):
      raise ValueError("There seem to be no targets...")
    elif self.outOfRange(start) or self.outOfRange(end):
      raise ValueError("Range does not exist.")

    progress = FillingSquaresBar('Converting', max = (end - start))
    for t in range(start, end): 
      
      img = self.convertTarget(self.targets[t])
      imgName = './out/'+str(t)+'.png' 
      img.save(imgName)

      img.close()

      progress.next()
    progress.finish()

    images = []
    for fn in os.listdir('out'):
      images.append(imageio.imread('out/'+fn))

    imageio.mimsave('out.gif', images, 'GIF-FI', duration = .25)