Exemplo n.º 1
0
def get_user_media(username):
    result = {}
    r = requests.get('https://www.instagram.com/' + username)
    data_search = re.search(
        '<script type="text/javascript">window._sharedData = (.*);</script>',
        r.text, re.IGNORECASE)
    if data_search:
        tmp = data_search.group(1)
        data = json.loads(tmp)
        try:
            user = data['entry_data']['ProfilePage'][0]['graphql']['user']
            result['user_id'] = user['id']
            result['user_username'] = user['username']
            result['follower'] = user['edge_followed_by']['count']
            result['follows'] = user['edge_follow']['count']
            result['media_count'] = user['edge_owner_to_timeline_media'][
                'count']
            result['media'] = []
            result['media_ids'] = set()

            for post in user['edge_owner_to_timeline_media']['edges']:
                post = {
                    'id':
                    post['node']['id'],
                    'timestamp':
                    post['node']['taken_at_timestamp'],
                    'is_video':
                    post['node']['is_video'],
                    'caption':
                    post['node']['edge_media_to_caption']['edges'][0]['node']
                    ['text'] if post['node']['edge_media_to_caption']['edges']
                    else "Could not find caption",
                    'thumbnail':
                    post['node']['thumbnail_src'],
                    'image':
                    post['node']['display_url']
                }
                result['media'].append(post)
                result['media_ids'].add(post['id'])

        except KeyError as exception:
            log.error(
                'Unexpected response retrieving {} info: {!r}\n\nData: {}'.
                format(username, exception, data))
            return InstagramUserFeed(result)

        log.info('Scraped ' + result['user_username'] + ' and ' +
                 str(len(result['media'])) + ' posts')
    else:
        log.error('Failed to extract meta-information from HTML page')
    return InstagramUserFeed(result)
Exemplo n.º 2
0
def select_police_stations(name: str, value: str) -> None:
    """
    Seleciona uma delegacia.

    :param name: name do select.
    :param value: valor a ser selecionado.
    """

    try:
        Select(driver.find_element_by_name(name)).select_by_value(str(value))
    except NoSuchElementException as e:
        error(e.__str__())
        raise NoSuchElementException(
            f'Não foi possível encontrar a opção com o valor {value}')
Exemplo n.º 3
0
def select_option(name: str, value: str) -> None:
    """
    Seleciona uma opção do select option, pelo seu valor.

    :param name: name do select.
    :param value: valor a ser selecionado.
    """

    to_select: str = get_values(name)[value]

    try:
        Select(driver.find_element_by_name(name)).select_by_value(
            str(to_select))
    except NoSuchElementException as e:
        error(e.__str__())
        raise NoSuchElementException(
            f'Não foi possível encontrar a opção com o valor {to_select}')
Exemplo n.º 4
0
    def _find_download_url(self, ep_page_html):
        download_url_pattern = 'googleusercontent'
        soup = BeautifulSoup(ep_page_html, SOUP_PARSER_HTML)

        download_link = None
        for link in soup.find_all('a'):
            ref = link.get('href')
            if download_url_pattern in str(ref):
                if download_link is not None:
                    error('more than one download link found; {}, {}'.format(
                        download_link, ref))
                download_link = ref

        if download_link is None:
            raise RuntimeError('no download link found')

        return download_link
Exemplo n.º 5
0
    def __database(cls, db_name: Optional[str] = 'scraping') -> Database:
        """
        Retorna o banco de dados padrão, caso o nenhum valor seja passado em 'db_name'.\n
        Senão, um novo banco de dados será criado e retornado.\n

        :param db_name: nome do banco de dados a ser criado, ou retornado.
        :raise OperationFailure: falha ao criar o banco de dados.
        :return: banco de dados criado.
        """

        if db_name not in cls.__CONN.list_database_names():
            try:
                db: Database = cls.__CONN[db_name]
                info(f'Database {db_name} criado.')
                return db
            except OperationFailure as e:
                error(f'Erro ao criar o database: {e.__str__()}')
        return cls.__CONN.get_database(db_name)
Exemplo n.º 6
0
def generate_report(domaine: str):
    template_name = "template_{}.html".format(domaine)
    template_path = os.path.join(os.getcwd(), TEMPLATES_FOLDER, template_name)

    report_name = "rapport_{}.html".format(domaine)
    report_path = os.path.join(os.getcwd(), OUTPUT_FOLDER, report_name)

    if domaine not in DOMAINES:
        log.error("Domaine {} inconnu".format(domaine))
    else:
        log.info("Génération du rapport {}...".format(report_name))

        if domaine == "client":
            datas = DonneesClient().tags
        elif domaine == "paiement":
            datas = DonneesPaiement().tags

        # Jinja2
        DefaultTemplater(template_path, report_path).render(datas)
        log.info("Rapport {} généré !".format(report_name))
Exemplo n.º 7
0
def read_parameters():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hd:", ["help", "domaine"])
    except getopt.GetoptError as err:
        log.error(str(err))
        sys.exit(2)

    parameters = Parameters()

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            log.usage()
            sys.exit()
        elif opt in ("-d", "--domaine"):
            parameters.domaine = arg
        else:
            assert False, "Option non prise en compte"

    check_parameters(parameters.domaine)

    return parameters
Exemplo n.º 8
0
    def __get_records(self) -> dict:
        """
        Obtém os registros de determinado crime que esteja presente na tabela de ocorrências, trantando os dados \n
        e montando um dict com os valores obtidos. \n

        :raise ValueError: caso o crime passado não seja encontrado.
        :return: dict contendo os registros de cada mês e o total.
        """

        table_header: list = extract_table_value(self.__id_table, 'th')

        table_datas: list = extract_table_value(self.__id_table, 'td')

        try:
            key_word: int = table_datas.index(self.__crime)

        except ValueError as e:
            error(
                f'Erro ao obter os dados da região {self.__region}.\n Detalhes: {e.__str__()}'
            )

            raise ValueError(
                f'O crime {self.__crime} não está presente na tabela.')

        else:
            records: list = table_datas[key_word:key_word + len(table_header)]

            keys: list = list(
                map(lambda to_lower: to_lower.lower(),
                    filter(lambda value: value != 'Natureza', table_header)))

            records.pop(0)

            values: list = list(
                map(lambda value: float(value.replace('...', '0')), records))

            info(f'Registros da região {self.__region} obtidos.')

            return dict(zip(keys, values))
Exemplo n.º 9
0
    def collection(
            self,
            is_current_occurrences: Optional[bool] = False) -> Collection:
        """
        Retorna a collection especificada, caso exista.\n
        Senão, uma nova será criada e retornada.\n

        >>> 'current_occurrences' if is_current_occurrences else 'last_occurrences'

        :param is_current_occurrences: boolean para identificar qual collection será criada, ou retornada.
        :return: collection criada ou selecionada.
        """

        db: Database = self.__database()

        coll_name: str = 'current_occurrences' if is_current_occurrences else 'last_occurrences'

        if coll_name not in db.list_collection_names():
            try:
                db.create_collection(coll_name)
                info(f'Collection {coll_name} criada.')
            except OperationFailure as e:
                error(f'Erro ao criar a collection: {e.__str__()}')
        return db.get_collection(coll_name)
Exemplo n.º 10
0
def check_parameters(domaine):
    # Paramètres obligatoires sinon on sort
    if not domaine:
        log.error("Le domaine du rapport à générer est obligatoire")
        log.usage()
        sys.exit()
    def __init__(self):
        from src import log

        self.DEBUG: bool = strtobool(os.environ.get("DEBUG", False))

        self.use_cache: bool = strtobool(os.environ.get("use_cache", True))

        try:
            self.image_size: int = int(os.environ.get("image_size", 256))
        except ValueError as e:
            log.error("Env variable image_size must be set to an integer")
            raise e
        except Exception as e:
            log.error("Uknown error when loading image_size from environment")
            raise e

        self.include_healthy_annotations: bool = strtobool(
            os.environ.get("include_healthy_annotations", False))
        self.include_records_without_annotations: bool = strtobool(
            os.environ.get("include_records_without_annotations", False))

        self.batch_size: int = int(os.environ.get("batch_size", 16))
        self.artificial_batch_size: int = int(
            os.environ.get("artificial_batch_size", 256))

        if self.batch_size > self.artificial_batch_size:
            log.warn(
                f"Artificial batch size was smaller than batch size, this is not possible ({self.batch_size} > {self.artificial_batch_size}), artificial batch size set to batch size"
            )
            self.artificial_batch_size = self.batch_size

        self.gpu_count: int = int(os.environ.get("GPU_COUNT", 0))
        self.one_gpu_for_validation: bool = strtobool(
            os.environ.get("HOLD_ONE_GPU_FOR_VALIDATION", False))
        self.use_gpu: bool = self.gpu_count > 0 and torch.cuda.is_available()

        if self.gpu_count > 0 and not self.use_gpu:
            log.error(
                "Attempted to utilize a GPU but no GPU or CUDA Driver was found.  Defaulting to CPU"
            )
            self.gpu_count = 0

        if self.use_gpu and self.gpu_count > torch.cuda.device_count():
            log.warn(
                f"Attempted to utilize more GPUs than allowed, setting gpu count to {torch.cuda.device_count()}"
            )
            self.gpu_count = torch.cuda.device_count()

        self.devices: List[torch.device]
        self.validation_device: torch.device
        if self.use_gpu:
            self.devices = [
                torch.device(f'cuda:{x}') for x in range(self.gpu_count)
            ]
        else:
            self.devices = [torch.device('cpu')]

        if self.use_gpu and self.gpu_count > 1 and self.one_gpu_for_validation:
            self.validation_device = self.devices[-1]
            self.devices = self.devices[:-1]
            self.gpu_count -= 1
        elif self.one_gpu_for_validation and self.gpu_count <= 1:
            log.warn(
                "Attempted to hold off one GPU for validation, but only 1 gpu was found, defaulting validation device to base device"
            )
            self.validation_device = self.devices[0]
        else:
            self.validation_device = self.devices[0]

        self.distribute_across_gpus: bool = strtobool(
            os.environ.get("distribute_across_gpus", False))

        if self.use_gpu:
            torch.cuda.set_device(0)