示例#1
0
    def _update_10_rows_from_index(i):
        records = [{'id': row['airtableId'], 'fields': {'Synced time input': row['Synced time input']}} for row in rows[i: i + 10]]
        res = update_airtable_rows(
            SCIENCE_FEEDBACK_AIRTABLE_BASE_ID,
            NAME_TO_AIRTABLE[name],
            {'records': records},
            session=session
        )

        if res.status_code != 200:
            logger.error(f'code: {res.status_code}, error: {res.content}')
示例#2
0
def shares_from_url(url, request_start_date):

    params = {
        'count': 1000,
        # 'includeHistory': 'true',
        'link': url,
        'platforms': 'facebook',
        'sortBy': 'total_interactions',
        'startDate': request_start_date,
        'token': CROWDTANGLE_API_KEY
    }

    api_endpoint = 'links'

    response = requests.get('{}/{}'.format(CROWDTANGLE_API_URL, api_endpoint),
                            params).json()

    shares = []
    if response['status'] == 200:
        if not response.get('result'):
            logger.warning('Crowdtangle data returned is empty.')
            return shares

        for post in response['result']['posts']:
            account = post['account']
            shares.append({
                'account': {
                    'crowdtangleIdentifier': str(account['id']),
                    'facebookIdentifier': str(account['platformId']),
                    'logoUrl': account['profileImage'],
                    'name': account['name'],
                    'url': account['url']
                },
                'post': {
                    'crowdtangleIdentifier':
                    str(post['id']),
                    'facebookIdentifier':
                    str(post['platformId']),
                    'publishedDate':
                    datetime.strptime(post['date'], '%Y-%m-%d %H:%M:%S'),
                    'url':
                    post['postUrl'],
                }
            })
    else:
        logger.error(
            f'Error in fetching from Crowdtangle: {response.get("message", "Unknown exception.")}'
        )
        logger.warning('Returning empty interaction data')

    sleep(30)
    return shares
示例#3
0
def create_entity_from_row(entity_name):
    try:
        entity = entity_from_row_for(
            entity_name, request.json,
            request.json.get('index', request.json.get('airtableId')))
        if entity:
            ApiHandler.save(entity)
            if entity.__class__ == Appearance:
                sync_content(entity.quotingContent)
            return jsonify(as_dict(entity)), 200
        else:
            return jsonify({"error": "couldn't save the entity"}), 500
    except Exception as e:
        logger.error(e)
        return jsonify({"exception": "couldn't complete your request"}), 500
示例#4
0
def read_thumb(files=None, form=None):
    if 'thumb' in files:
        thumb = files['thumb']
        filename_parts = thumb.filename.rsplit('.', 1)
        if len(filename_parts) < 2 \
                or filename_parts[1].lower() not in ALLOWED_EXTENSIONS:
            raise ApiErrors({
                'thumb': [
                    f"Cette image manque d'une extension {READABLE_EXTENSIONS} ou son format n'est pas autorisé"
                ]
            })
        return thumb.read()

    if 'thumbUrl' in form:
        try:
            return _fetch_image(form['thumbUrl'])
        except ValueError as e:
            logger.error(e)
            raise ApiErrors(
                {'thumbUrl': ["Th L'adresse saisie n'est pas valide"]})
示例#5
0
def _fetch_image(thumb_url: str) -> bytes:
    if not thumb_url[0:4] == 'http':
        raise ValueError('Invalid thumb URL : %s' % thumb_url)

    try:
        response = requests.get(thumb_url)
    except Exception as e:
        logger.error(e)
        raise ApiErrors({
            'thumbUrl': ["Impossible de télécharger l'image à cette adresse"]
        })
    content_type = response.headers['Content-type']
    is_an_image = content_type.split('/')[0] == 'image'

    if response.status_code == 200 and is_an_image:
        return response.content
    else:
        raise ValueError(
            'Error downloading thumb from url %s (status_code : %s)' %
            (thumb_url, str(response.status_code)))
def create_wayback_machine_url(url, sleep_time=2):
    logger.info('Saving {} to Wayback Machine...'.format(url))
    with requests.Session() as session:
        session.headers = {
            'Connection': 'keep-alive',
            'host': urlparse(BASE_URL).hostname,
            'User-Agent': 'Science Feedback (https://sciencefeedback.co)'
        }
        session.allow_redirects = True
        session.timeout = 120

        res = session.get('{}{}'.format(SAVE_URL, url))
        # wait time to ensure the page is saved
        sleep(sleep_time)
        if res.status_code == 200:
            logger.info('Saving {} to Wayback Machine...Done.'.format(url))
            location = res.headers['Content-Location']
            return '{}{}'.format(BASE_URL, location)
        else:
            logger.error('Saving {} to Wayback Machine...ERROR: {}'.format(
                url, res.status_code))
            return None
示例#7
0
def claim_verdicts_from_airtable(verdicts_to_sync=None,
                                 max_verdicts=None,
                                 sync_async=False):
    if verdicts_to_sync is None:
        query = Verdict.query.filter(Verdict.scienceFeedbackUrl != None)
        if max_verdicts is not None:
            query = query.limit(max_verdicts)

        verdicts = query.all()
    else:
        verdicts = verdicts_to_sync

    if max_verdicts is not None:
        max_verdicts = len(verdicts)

    urls = [verdict.scienceFeedbackUrl for verdict in verdicts][:max_verdicts]
    if sync_async:
        claim_reviews = map_asynchronous(claim_review_from_url, urls)
    else:
        claim_reviews = [claim_review_from_url(url) for url in urls]

    for (index, verdict) in enumerate(verdicts):
        claim_review = claim_reviews[index]
        if not claim_review:
            continue

        for conclusion in claim_review['conclusions']:
            try:
                tag = Tag.create_or_modify({
                    '__SEARCH_BY__': ['label', 'type'],
                    'label': conclusion,
                    'type': TagType.CONCLUSION
                })
                if tag.id is None:
                    logger.info('Saving tag {}'.format(as_dict(tag)))
                    ApiHandler.save(tag)

                verdict_tag = VerdictTag.create_or_modify({
                    '__SEARCH_BY__': ['tagId', 'verdictId'],
                    'tagId':
                    humanize(tag.id),
                    'verdictId':
                    humanize(verdict.id)
                })
                verdict.verdictTags = verdict.verdictTags + [verdict_tag]

            except IntegrityError as e:
                logger.error('IntegrityError: {}, Conclusion: {}'.format(
                    e, conclusion))
            except InvalidRequestError as e:
                logger.error('InvalidRequestError: {}, Conclusion: {}'.format(
                    e, conclusion))
            except NotNullViolation as violation:
                logger.error('NotNullViolation: {}, Conclusion: {}'.format(
                    violation, conclusion))

    return verdicts
示例#8
0
def sync_for(
    name,
    formula=None,
    max_records=None,
    session=None,
    sync_to_airtable=False
):
    if session is None:
        session = requests.Session()

    rows = request_airtable_rows(
        SCIENCE_FEEDBACK_AIRTABLE_BASE_ID,
        NAME_TO_AIRTABLE[name],
        filter_by_formula=formula,
        max_records=max_records,
        session=session
    )

    entities = []
    if rows:
        logger.info(f'syncing table {NAME_TO_AIRTABLE[name]}')
    else:
        logger.info(f'nothing to sync for table {NAME_TO_AIRTABLE[name]}')

    for (index, row) in enumerate(rows):
        try:
            entity = entity_from_row_for(name, row, index)
            if entity:
                entities.append(entity)
                row['Synced time input'] = datetime.now().isoformat()
            else:
                row['Synced time input'] = 'ERROR'
        except KeyError as exception:
            logger.warning(f'Error while trying to create entity from row at table {NAME_TO_AIRTABLE[name]}')
            logger.error(f'KeyError {exception}: {row}')
            row['Synced time input'] = 'ERROR'
        except Exception as exception:
            logger.warning(f'Error while trying to create entity from row at table {NAME_TO_AIRTABLE[name]}')
            logger.error(f'Unexpected error: {exception} - {sys.exc_info()[0]} at {row}')
            row['Synced time input'] = 'ERROR'

    def _update_10_rows_from_index(i):
        records = [{'id': row['airtableId'], 'fields': {'Synced time input': row['Synced time input']}} for row in rows[i: i + 10]]
        res = update_airtable_rows(
            SCIENCE_FEEDBACK_AIRTABLE_BASE_ID,
            NAME_TO_AIRTABLE[name],
            {'records': records},
            session=session
        )

        if res.status_code != 200:
            logger.error(f'code: {res.status_code}, error: {res.content}')

    try:
        # Sync verdict status from wordpress
        if name == 'verdict' and formula is not None:
            entities = claim_verdicts_from_airtable(verdicts_to_sync=entities)

        # Sync related contents for appearances
        if name == 'appearance' and formula is not None:
            for entity in entities:
                sync_content(entity.quotingContent)

        # Set the time synced so that the status in airtable is "Synced"
        if sync_to_airtable:
            for i in range(0, len(rows), 10):
                try:
                    ApiHandler.save(*entities[i:i + 10])
                    _update_10_rows_from_index(i)

                except Exception as exception:
                    logger.warning(f'Error while trying to save 10 entities at table {NAME_TO_AIRTABLE[name]}')
                    logger.error(f'Unexpected error: {exception} - {sys.exc_info()[0]}')
                    for index in range(i, i + 10):
                        rows[index]['Synced time input'] = 'BATCH ERROR'
                    _update_10_rows_from_index(i)

    except Exception as exception:
        logger.warning(f'Error while trying to save entities at table {NAME_TO_AIRTABLE[name]}')
        logger.error(f'Unexpected error: {exception} - {sys.exc_info()[0]}')
def url_from_archiveis(url):
    save_url = '{}/submit/'.format(ARCHIVEIS_URL)
    headers = {
        'User-Agent': 'Science Feedback (https://sciencefeedback.co)',
        'host': urlparse(ARCHIVEIS_URL).hostname
    }
    get_kwargs = dict(allow_redirects=True, headers=headers, timeout=120)

    response = requests.get(ARCHIVEIS_URL + '/', **get_kwargs)
    response.raise_for_status()

    html = str(response.content)
    try:
        unique_id = html.split('name="submitid',
                               1)[1].split('value="', 1)[1].split('"', 1)[0]
    except IndexError as e:
        logger.error('Cannot find unique id: {}.'.format(e))
        logger.info('Submitting without unique id.')
        unique_id = None

    data = {
        "url": url,
        "anyway": 1,
    }

    if unique_id is not None:
        data.update({'submitid': unique_id})

    post_kwargs = dict(allow_redirects=True,
                       headers=headers,
                       data=data,
                       timeout=120)

    logger.info('Archiving URL: {}'.format(url))
    response = requests.post(save_url, **post_kwargs)
    response.raise_for_status()

    if 'Refresh' in response.headers:
        archive_url = str(
            response.headers['Refresh']).split(';url=')[1].replace('/wip', '')
        logger.info("archive_url from Refresh header: {}".format(archive_url))
        return archive_url

    if 'Location' in response.headers:
        archive_url = response.headers['Location']
        logger.info("archive_url from Location header: {}".format(archive_url))
        return archive_url

    logger.info(
        "archive_url not found in response headers. Inspecting history.")
    for i, r in enumerate(response.history):
        logger.info("Inspecting history request #{}".format(i))
        logger.info(r.headers)
        if 'Location' in r.headers:
            archive_url = r.headers['Location']
            logger.info(
                "archive_url from the Location header of {} history response: {}"
                .format(i + 1, archive_url))
            return archive_url

    logger.error("No archive_url returned by archive.vn")
    logger.error("Status code: {}".format(response.status_code))
    logger.error(response.headers)
    logger.error(response.text)
    return None