Exemplo n.º 1
0
def test_session():
    name = "test_platform"
    session = db.get_session()
    session.add(models.SourcePlatform(name=name))
    session.commit()
    result = session.query(models.SourcePlatform).all()
    assert len(result) == 1
    assert result[0].name == name
    session.close()
def test_over_limit():
    limit = 110
    single.batch_collect_single_platform("twitter",
                                         keywords=["cp"],
                                         limit=limit)
    session = db.get_session()
    assert len(session.query(models.SourcePlatform).all()) == 1
    assert len(session.query(models.TextContent).all()) == limit
    assert len(session.query(models.TextAuthor).all()) == limit
    assert len(session.query(models.TextMetadata).all()) == limit
Exemplo n.º 3
0
def persist_scrape_result(scrape_result, session=db.get_session()):
    """Short summary.

    :param ScrapeResult scrape_result: pydantic scrape result model
    :param Session session: sqlalchemy session
    :return: True if success
    :rtype: boolean
    """
    source_platform = (session.query(models.SourcePlatform).filter(
        models.SourcePlatform.name == scrape_result.source_platform).first())
    if source_platform is None:
        source_platform = models.SourcePlatform(
            name=scrape_result.source_platform)
        session.add(source_platform)
        session.flush()

    if scrape_result.app_target:
        app_target = (session.query(models.AppTarget).filter(
            models.AppTarget.name == scrape_result.app_target).first())

    for text_content in scrape_result.contents:
        TextContent = models.TextContent(content=text_content.content)
        session.add(TextContent)
        session.flush()
        if scrape_result.app_target:
            ATA = models.AppTextAssociation(text_id=TextContent.id,
                                            app_id=app_target.id)
            session.add(ATA)

        ta_id = models.generate_uuid()
        TextAuthor = models.TextAuthor(
            id=ta_id,
            username=text_content.author_username,
            source_platform=source_platform.id,
        )
        session.add(TextAuthor)
        session.flush()

        TextMetadata = models.TextMetadata(
            id=TextContent.id,
            content_type=scrape_result.content_type,
            author=ta_id,
            source_platform=source_platform.id,
            conversation_native_id=text_content.conversation_native_id,
            publication_date=text_content.publication_date,
            publically_available=text_content.publically_available,
            native_id=text_content.native_id,
            keywords=json.dumps(text_content.keywords),
            miscellanous=json.dumps(text_content.miscellanous),
        )
        session.add(TextMetadata)

        session.commit()

    return True
def test_since_until():
    # Since and Until are not really working
    # Seems to be a twint issue
    limit = 5
    single.batch_collect_single_platform("twitter",
                                         keywords=["cp"],
                                         limit=limit,
                                         since="2016-12-06",
                                         until="2016-12-07")

    session = db.get_session()
    assert len(session.query(models.SourcePlatform).all()) == 1
    assert len(session.query(models.TextContent).all()) == limit
    assert len(session.query(models.TextAuthor).all()) == limit
    assert len(session.query(models.TextMetadata).all()) == limit
Exemplo n.º 5
0
def persist_app_target(app_metadata, session=db.get_session()):
    """Persist an app with metadata"""
    target = models.AppTarget(name=app_metadata.name,
                              bundle_id=app_metadata.bundle_id)
    session.add(target)
    session.flush()

    metadata = models.AppMetadata(
        id=target.name,
        publisher=app_metadata.publisher,
        esrb_rating=app_metadata.esrb_rating,
        publication_date=app_metadata.publication_date,
    )

    session.add(metadata)
    session.flush()

    return True
Exemplo n.º 6
0
def batch_collect_single_platform(
    platform,
    keywords=get_all_keywords(),
    limit=10,
    app_target=None,
    since=None,
    until=None,
):
    """Perform a batch collect"""

    scraper = get_scraper(platform)
    ScrapeResult = scraper.collect_batch(keywords, limit, app_target, since, until)
    session = get_session()
    try:
        persist_scrape_result(ScrapeResult, session)
    except Exception:
        logger.exception("failed to persist result")
    finally:
        session.commit()
        session.close()

    return True