Пример #1
0
def collect_google_scores(terms, start, end):
    logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
    logger.info('Querying %d terms between %s and %s' % (len(terms), start, end))
    logger.debug(', '.join(t.term for t in terms))
    service = build(
        'trends',
        'v1beta',
        developerKey=os.environ["GOOGLE_API_KEY"],
        discoveryServiceUrl='https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest',
        cache_discovery=False
    )
    graph = service.getTimelinesForHealth(
        terms=[t.term for t in terms],
        geoRestriction_region='GB-ENG',
        time_startDate=start.strftime('%Y-%m-%d'),
        time_endDate=end.strftime("%Y-%m-%d"),
        timelineResolution='day')
    try:
        response = graph.execute()
    except HttpError as e:
        logger.exception(e)
        raise e
    for line in response['lines']:
        term = next(t for t in terms if t.term == line['term'])
        for point in line['points']:
            day = datetime.strptime(point['date'], "%b %d %Y").date()
            gs = get_google_score(term, day)
            gs.value = float(point['value'])
            yield gs
Пример #2
0
def calculate_moving_averages(model, day):
    window_size = model.get_data()['average_window_size']
    logger.debug('Calculating %d-day averages on %s' % (window_size, day))
    for term in model.google_terms:
        avg = calculate_moving_average(term, day, window_size)
        if avg is None:
            continue
        yield (term.term, avg)
Пример #3
0
def shrew(day):
    logger.debug('Running Shrew over Tweets from %s' %
                 day.strftime('%Y-%m-%d'))
    cmd = sh.Command('/home/deploy/shrew/shrew')
    cmd(
        'run',
        '/home/deploy/fludetector/fludetector/sources/twitter/shrew-profile.yaml',
        '--as', 'local', '--day', day.strftime('%Y%m%d'), '--input0',
        input0(day), '--input1', input1(day), '--input2', input2(day),
        '--output', SHREW_OUTPUT_PATH)
Пример #4
0
def send_to_matlab(model, averages):
    fd = tempfile.NamedTemporaryFile()
    fd.write('\n'.join('%s,%f' % a for a in averages))
    fd.flush()

    logger.debug('Sending query list and GoogleScores to fmedia13')
    scp(fd.name, 'fmedia13:/tmp/fludetector_google_matlab_input')
    fd.close()

    fmedia13 = ssh.bake('fmedia13')

    run = ';'.join([
        "fin='/tmp/fludetector_google_matlab_input'",
        "fout='/tmp/fludetector_google_matlab_output'",
        "cd /home/vlampos/website_v2",
        "run('gpml/startup.m')",
        "%s(fin,fout)" % model.get_data()['matlab_function'],
        "exit"])
    logger.debug('Running matlab function over scores')
    fmedia13('matlab', '-nodisplay', '-nojvm', '-r', '"%s"' % run)

    logger.debug('Reading matlab results back')
    value = float(fmedia13('cat', '/tmp/fludetector_google_matlab_output').strip())

    logger.debug('Cleaning up temp files')
    fmedia13('rm', '/tmp/fludetector_google_matlab_input', '/tmp/fludetector_google_matlab_output')

    return value
Пример #5
0
def collect_tweets(day):
    logger.debug('Collecting Tweets for %s' % day.strftime('%Y-%m-%d'))

    if not os.path.isfile(input0(day)):
        raise FluDetectorError("Couldn't find tweets from %s" % input0(day))

    if not os.path.isfile(input2(day)):
        raise FluDetectorError("Couldn't find tweets from %s" % input2(day))

    path = input1(day)
    if not os.path.isfile(path):
        raise FluDetectorError("Couldn't find tweets from %s" % path)
    logger.debug("  Decompressing day's Tweets")
    lzop('--decompress', path, '--output', SINGLE_DAY_PATH, '--force')
Пример #6
0
def run(model, start, end, csv_file=None, **kwargs):
    if csv_file is None:
        raise FluDetectorError('No CSV file provided')
    logger.info('Reading CSV file into %s' % str(model))
    csv_reader = csv.reader(csv_file)

    headers = next(csv_reader)

    day_index = find_matching_index(headers, ['Day', 'Date'], required=True)
    region_index = find_region_index(headers)

    logger.debug('Found columns for regions %s' %
                 ', '.join(region_index.keys()))

    logger.info('Reading rows...')
    for row_index, row in enumerate(csv_reader):
        day = datetime.strptime(row[day_index], '%Y-%m-%d').date()

        if day < start or day > end:
            continue

        for region, col_index in region_index.iteritems():
            try:
                value = float(row[col_index])
            except ValueError:
                logger.debug('Skipping row %d column %d, not a float' %
                             (row_index + 1, col_index))
                continue
            try:
                ms = ModelScore.query.filter_by(model_id=model.id,
                                                day=day,
                                                region=region).one()
            except NoResultFound:
                ms = ModelScore()
                ms.region = region
                ms.model = model
                ms.day = day
            ms.value = value
            db.session.add(ms)

    db.session.commit()
    logger.info('Done!')