Exemplos de query em Python, exemplos de analysis.utils.db.session.query em Python

Exemplo n.º 1

0

Exibir arquivo

def get_starting_date():
    q = session.query(DailyDiagnosticChangeModel)
    daily_change = q.order_by(DailyDiagnosticChangeModel.date).first()
    if daily_change is None:
        return date.today()
    else:
        return daily_change.date

Exemplo n.º 2

0

Exibir arquivo

def get_npa_list():
    q = session.query(LocationModel)
    locations = q.all()
    geo_locations = []
    for location in locations:
        geo_locations.append(str(location.postal_code))
    return geo_locations

Exemplo n.º 3

0

Exibir arquivo

def load():
    q = session.query(LocationModel)
    locations = q.all()
    all_locations = (loc.serialize() for loc in locations)

    df = pd.DataFrame(all_locations)
    return df

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test.py Projeto: sccluisx/analysis-scripts

def test_multiple_persons_by_npa():
    persons = 100
    timestamp = int(time.time())
    npa = 3240

    for id in range(0,persons):
        doc_id = str(uuid.uuid4())[0:10]
        print(doc_id)
        report = IndividualReportModel(
            document_id=doc_id,
            diagnostic=random.randint(0, 4),
            locator=npa,
            session_id=str(uuid.uuid4()),
            timestamp=timestamp,
            analysis_done=False
        )
        session.add(report)
    session.commit()

    analysis_next_report(persons)

    _date = datetime.fromtimestamp(timestamp)
    print(_date)
    q = session.query(DailyDiagnosticChangeModel)
    daily_change = q.filter_by(locator=npa, year=_date.year, month=_date.month, day=_date.day).first()

    total = daily_change.diagnostic_0 + \
            daily_change.diagnostic_1 + \
            daily_change.diagnostic_2 + \
            daily_change.diagnostic_3 + \
            daily_change.diagnostic_4

    assert persons == total

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test.py Projeto: sccluisx/analysis-scripts

def test_one_person_update():

    nb_update = 1000
    timestamps = range(1584892474, 1584892474 + nb_update)
    session_id = str(uuid.uuid4())
    npa = 1246

    for timestamp in timestamps:
        doc_id = str(uuid.uuid4())[0:10]
        print(doc_id)
        report = IndividualReportModel(
            document_id=doc_id,
            diagnostic=random.randint(0, 4),
            locator=npa,
            session_id=session_id,
            timestamp=timestamp,
            analysis_done=False
        )
        session.add(report)
    session.commit()

    analysis_next_report(nb_update)

    _date = datetime.fromtimestamp(timestamps[0])
    q = session.query(DailyDiagnosticChangeModel)
    daily_change = q.filter_by(locator=npa, year=_date.year, month=_date.month, day=_date.day).first()

    total = daily_change.diagnostic_0 + \
            daily_change.diagnostic_1 + \
            daily_change.diagnostic_2 + \
            daily_change.diagnostic_3 + \
            daily_change.diagnostic_4

    print(total)
    assert total == 1

Exemplo n.º 6

0

Exibir arquivo

Arquivo: routes.py Projeto: We-Care-sweden/analysis-scripts

def report():
    data = request.json
    print(data)

    # if token submitted
    # if token valid
    # insert report into database
    # else
    # generate new token
    # insert report into database
    # return token

    if 'token' in data.keys():
        submitted_token = str(data['token'])
        if len(submitted_token) == 7:
            pass
        else:
            raise InvalidUsage("Invalid token")

        q = session.query(Token).filter_by(token=submitted_token).first()
        if q:
            try:
                report = ReportFactory.build(data)
                try:
                    session.add(report)
                    session.commit()
                except:
                    session.rollback()
                    abort(500, "Could not insert into database")
            except TypeError:
                raise InvalidUsage(
                    "Some parameter was wrongly typed (string, int, array).")

            if report.has_comorbid:
                data['report']['comorbid']['parent_id'] = report.document_id

                try:
                    comorbid = ComorbidFactory.build(
                        data['report']['comorbid'])
                    try:
                        session.add(comorbid)
                        session.commit()
                    except:
                        session.rollback()
                        abort(500, "Could not insert into database")
                except TypeError:
                    raise InvalidUsage(
                        "Some parameter was wrongly typed (string, int, array)."
                    )

            return make_response("", 201)

        else:
            raise InvalidUsage("Provided token doesn't exist")
    elif 'report' in data.keys():
        generated_token = generate_token()
        data['token'] = generated_token

        token = TokenFactory.build(generated_token)
        try:
            session.add(token)
            session.commit()
        except:
            session.rollback()
            abort(500, "Could not insert a new token into database")

        try:
            report = ReportFactory.build(data)
            try:
                session.add(report)
                session.commit()
            except:
                session.rollback()
                abort(500, "Could not insert into database")
        except TypeError:
            raise InvalidUsage(
                "Some parameter was wrongly typed (string, int, array).")

        if report.has_comorbid:
            data['report']['comorbid']['parent_id'] = report.document_id

            try:
                comorbid = ComorbidFactory.build(data['report']['comorbid'])
                try:
                    session.add(comorbid)
                    session.commit()
                except:
                    session.rollback()
                    abort(500, "Could not insert into database")
            except TypeError:
                raise InvalidUsage(
                    "Some parameter was wrongly typed (string, int, array).")

        return make_response(jsonify({"token": generated_token}), 201)

    else:
        raise InvalidUsage("Required parameters are missing")

Exemplo n.º 7

0

Exibir arquivo

Arquivo: routes.py Projeto: We-Care-sweden/analysis-scripts

def all():
    q = session.query(IndividualReportModel)
    individuals = q.all()
    return jsonify(
        individials=[individual.serialize() for individual in individuals])

Exemplo n.º 8

0

Exibir arquivo

Arquivo: routes.py Projeto: We-Care-sweden/analysis-scripts

def all_locations():
    q = session.query(LocationModel)
    locations = q.all()
    return jsonify(locations=[loc.serialize() for loc in locations])

Exemplo n.º 9

0

Exibir arquivo

Arquivo: analysis.py Projeto: sccluisx/analysis-scripts

def analysis_next_report(collection_size: int):
    start_time_analysis = time.time()

    # load all previous reports
    report_done_df = pd.read_sql("SELECT * FROM individual_report WHERE analysis_done = 1",
                                 con=engine, index_col="document_id")

    # load the next collection of reports to analyse
    next_reports = pd.read_sql("SELECT * FROM individual_report WHERE analysis_done = 0 ORDER BY timestamp LIMIT " + str(collection_size),
                               con=engine, index_col="document_id")

    # timestamp from millisecond to second
    report_done_df["timestamp"] = report_done_df["timestamp"] / 1000
    next_reports["timestamp"] = next_reports["timestamp"] / 1000

    # Hold the reports to update in the end
    report_to_update = list()

    # Hold the reports to update in the end
    daily_delta_df = pd.DataFrame(columns=['locator','year','month','day','0','1','2','3','4','5'])

    sample = 0
    start_time = time.time()
    # iterate in all the report to analyse
    for doc_id, next_report in next_reports.iterrows():
        sample += 1

        _date = datetime.fromtimestamp(next_report['timestamp']) # to seconds

        delta = [0, 0, 0, 0, 0, 0]

        # search for last status
        same_session_id_df = report_done_df.loc[report_done_df['session_id'] == next_report['session_id']]
        if same_session_id_df.empty:
            delta[next_report['diagnostic']] = 1
            #print('Delta (new user)',str(delta))
        else:
            last_report = same_session_id_df.sort_values(by=['timestamp'], ascending=False).iloc[0]
            if last_report['diagnostic'] != next_report['diagnostic']:
                delta[last_report['diagnostic']] = -1
                delta[next_report['diagnostic']] = 1
            #print('Delta (existing user)', str(delta))


        # check if daily delta already exist
        previous_daily = daily_delta_df.loc[(daily_delta_df['year'] == _date.year)
                                             & (daily_delta_df['month'] == _date.month)
                                             & (daily_delta_df['day'] == _date.day)
                                             & (daily_delta_df['locator'] == next_report['locator'])]

        if previous_daily.empty:
            # then create it
            new_daily = dict()
            new_daily['locator'] = next_report['locator']
            new_daily['year'] = _date.year
            new_daily['month'] = _date.month
            new_daily['day'] = _date.day
            new_daily['0'] = delta[0]
            new_daily['1'] = delta[1]
            new_daily['2'] = delta[2]
            new_daily['3'] = delta[3]
            new_daily['4'] = delta[4]
            new_daily['5'] = delta[5]
            new_daily[str(next_report['diagnostic'])] = 1
            daily_delta_df = daily_delta_df.append(new_daily, ignore_index=True)
        else:
            # then update it
            previous_daily = previous_daily.iloc[0]  # convert dataframe to serie by ask the first (the only one) row
            # print('new diagnostic:',next_report['diagnostic'])
            daily_delta_df.at[previous_daily.name, "0"] = previous_daily["0"] + delta[0]
            daily_delta_df.at[previous_daily.name, "1"] = previous_daily["1"] + delta[1]
            daily_delta_df.at[previous_daily.name, "2"] = previous_daily["2"] + delta[2]
            daily_delta_df.at[previous_daily.name, "3"] = previous_daily["3"] + delta[3]
            daily_delta_df.at[previous_daily.name, "4"] = previous_daily["4"] + delta[4]
            daily_delta_df.at[previous_daily.name, "5"] = previous_daily["5"] + delta[5]

        if sample % 100 == 0:
            spend_time = time.time() - start_time
            print('Analysed ' + str(sample) + ' samples in ' + str(spend_time) + 's')
            start_time = time.time()

        # update report status
        next_report['analysis_done'] = 1
        # keep trace of the report to update in DB
        #report_to_update_df = report_to_update_df.append(next_report)
        report_to_update.append(doc_id)
        # update the done report list
        report_done_df = report_done_df.append(next_report)

    #print('====== Report to update ========')
    #print(report_to_update)
    print('====== Daily change ========')
    print(daily_delta_df)



    # upload daily changes
    for doc_id, daily_delta in daily_delta_df.iterrows():
        locator = daily_delta['locator']
        year = daily_delta['year']
        month = daily_delta['month']
        day = daily_delta['day']
        q = session.query(DailyDiagnosticChangeModel)
        daily_change = q.filter_by(locator=locator, year=year, month=month, day=day).first()

        if daily_change is None:
            # then create it
            daily_change = DailyDiagnosticChangeModel(
                locator=locator,
                year=year,
                month=month,
                day=day,
                diagnostic_0=daily_delta['0'],
                diagnostic_1=daily_delta['1'],
                diagnostic_2=daily_delta['2'],
                diagnostic_3=daily_delta['3'],
                diagnostic_4=daily_delta['4'],
                diagnostic_5=daily_delta['5'],
            )
            session.add(daily_change)
        else:
            # update diagnostic
            daily_change.diagnostic_0 += daily_delta['0']
            daily_change.diagnostic_1 += daily_delta['1']
            daily_change.diagnostic_2 += daily_delta['2']
            daily_change.diagnostic_3 += daily_delta['3']
            daily_change.diagnostic_4 += daily_delta['4']
            daily_change.diagnostic_5 += daily_delta['5']

    session.commit()

    # build query to update individual reports
    ids = '('
    idx = 0
    for doc_id in report_to_update:
        if idx != 0:
            ids += ','
        ids += '"'+doc_id+'"'
        idx += 1
    ids += ')'

    query = 'UPDATE individual_report SET analysis_done=1 WHERE individual_report.document_id IN ' + ids + '; '
    with engine.connect() as con:
        con.execute(query)

    print('Analysis of ' + str(collection_size) + ' in ' + str(time.time() - start_time_analysis) + 's')

Exemplo n.º 10

0

Exibir arquivo

Arquivo: analysis.py Projeto: sccluisx/analysis-scripts

def count_report_to_analyse():
    q = session.query(IndividualReportModel)
    return q.filter_by(analysis_done=False).count()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: 90_export_geocoding.py Projeto: sccluisx/analysis-scripts

if __name__ == '__main__':

    fieldnames = [
        'npa_plz',
        'town',
        'state',
        'latitude',
        'longitude'
    ]

    with open(str(OUTPUT_GEO_CODING_FILE), 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        # query all location first
        q = session.query(LocationModel)
        locations = q.all()
        for location in locations:

            writer.writerow({
                'npa_plz': location.npa,
                'town': location.town,
                'state': location.state,
                'longitude': location.longitude,
                'latitude': location.latitude
            })

Exemplo n.º 12

0

Exibir arquivo

Arquivo: 06_export_csv.py Projeto: sccluisx/analysis-scripts

def export_daily_report_to_csv():
    from_day = '2020-03-24' #starting date in database !!!
    to_day = '2020-03-29'

    from_date = datetime.strptime(from_day, DAY_FORMAT)
    to_date = datetime.strptime(to_day, DAY_FORMAT)

    # clean the current output
    for _f in DAILY_REPORT_DIR.glob('*.csv'):
        _f.unlink()

    # query all location first
    q = session.query(LocationModel)
    locations = q.all()
    geo_locations = {}
    for location in locations:
        geo_locations[str(location.npa)] = {
            'longitude': location.longitude,
            'latitude': location.latitude,
            'state': location.state
        }

    fieldnames = [
        'date',
        'state',
        'npa_plz',
        'latitude',
        'longitude',
        'total_healthy',
        'total_sick_guess_no_corona',
        'total_sick_guess_corona',
        'total_sick_corona_confirmed',
        'total_recovered_confirmed',
        'total_recovered_not_confirmed',
    ]

    merge_file = OUTPUT_DATASETS_PATH / 'merge-all-days.csv'
    with open(str(merge_file), 'w', newline='') as csvfile:
        merge_writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        merge_writer.writeheader()

        # iterate over days
        totals = {}
        current_day = from_date
        while current_day <= to_date:

            # create daily file
            daily_file = DAILY_REPORT_DIR / ('ch-covid-19-' + current_day.strftime(DAY_FORMAT) + '.csv')
            with open(str(daily_file), 'w', newline='') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()

                q = session.query(DailyDiagnosticChangeModel)
                daily_changes = q.filter_by(year=current_day.year, month=current_day.month, day=current_day.day).all()

                print(current_day)
                if daily_changes is not None:
                    for daily_change in daily_changes:
                        try:
                            if daily_change.locator not in totals:
                                # then create it
                                totals[daily_change.locator] = {
                                    'longitude': geo_locations[daily_change.locator]['longitude'],
                                    'latitude': geo_locations[daily_change.locator]['latitude'],
                                    'state': geo_locations[daily_change.locator]['state'],
                                    'data': [
                                        daily_change.diagnostic_0,
                                        daily_change.diagnostic_1,
                                        daily_change.diagnostic_2,
                                        daily_change.diagnostic_3,
                                        daily_change.diagnostic_4,
                                        daily_change.diagnostic_5,
                                    ]
                                }
                            else:
                                # use the previous total
                                totals[daily_change.locator]['data'] = [
                                    totals[daily_change.locator]['data'][0] + daily_change.diagnostic_0,
                                    totals[daily_change.locator]['data'][1] + daily_change.diagnostic_1,
                                    totals[daily_change.locator]['data'][2] + daily_change.diagnostic_2,
                                    totals[daily_change.locator]['data'][3] + daily_change.diagnostic_3,
                                    totals[daily_change.locator]['data'][4] + daily_change.diagnostic_4,
                                    totals[daily_change.locator]['data'][5] + daily_change.diagnostic_5,
                                ]

                            # totals cannot be negative, just fix if some errors are present in the dataset
                            if totals[daily_change.locator]['data'][0] < 0:
                                totals[daily_change.locator]['data'][0] = 0
                            if totals[daily_change.locator]['data'][1] < 0:
                                totals[daily_change.locator]['data'][1] = 0
                            if totals[daily_change.locator]['data'][2] < 0:
                                totals[daily_change.locator]['data'][2] = 0
                            if totals[daily_change.locator]['data'][3] < 0:
                                totals[daily_change.locator]['data'][3] = 0
                            if totals[daily_change.locator]['data'][4] < 0:
                                totals[daily_change.locator]['data'][4] = 0
                            if totals[daily_change.locator]['data'][5] < 0:
                                totals[daily_change.locator]['data'][5] = 0


                        except KeyError:
                            # wrong npa
                            print('Wrong npa: ' + daily_change.locator)

                total_status = 0
                # export all totals in the current day
                # this take into account the total from previous days if no daily change in the current day
                for npa, total in totals.items():
                    total_status += sum(total['data'])
                    writer.writerow({
                        'npa_plz': npa,
                        'state': total['state'],
                        'longitude': total['longitude'],
                        'latitude': total['latitude'],
                        'date': current_day.strftime(DAY_FORMAT),
                        'total_healthy': total['data'][0],
                        'total_sick_guess_no_corona': total['data'][1],
                        'total_sick_guess_corona': total['data'][2],
                        'total_sick_corona_confirmed': total['data'][3],
                        'total_recovered_not_confirmed': total['data'][4],
                        'total_recovered_confirmed': total['data'][5],
                    })
                    merge_writer.writerow({
                        'npa_plz': npa,
                        'state': total['state'],
                        'longitude': total['longitude'],
                        'latitude': total['latitude'],
                        'date': current_day.strftime(DAY_FORMAT),
                        'total_healthy': total['data'][0],
                        'total_sick_guess_no_corona': total['data'][1],
                        'total_sick_guess_corona': total['data'][2],
                        'total_sick_corona_confirmed': total['data'][3],
                        'total_recovered_not_confirmed': total['data'][4],
                        'total_recovered_confirmed': total['data'][5],
                    })
                print(total_status)

            current_day = current_day + timedelta(days=1)