Exemplo n.º 1
0
def test_multiple_persons_by_npa():
    persons = 100
    timestamp = int(time.time())
    npa = 3240

    for id in range(0,persons):
        doc_id = str(uuid.uuid4())[0:10]
        print(doc_id)
        report = IndividualReportModel(
            document_id=doc_id,
            diagnostic=random.randint(0, 4),
            locator=npa,
            session_id=str(uuid.uuid4()),
            timestamp=timestamp,
            analysis_done=False
        )
        session.add(report)
    session.commit()

    analysis_next_report(persons)

    _date = datetime.fromtimestamp(timestamp)
    print(_date)
    q = session.query(DailyDiagnosticChangeModel)
    daily_change = q.filter_by(locator=npa, year=_date.year, month=_date.month, day=_date.day).first()

    total = daily_change.diagnostic_0 + \
            daily_change.diagnostic_1 + \
            daily_change.diagnostic_2 + \
            daily_change.diagnostic_3 + \
            daily_change.diagnostic_4

    assert persons == total
Exemplo n.º 2
0
def add_person():
    if not request.json:
        raise InvalidUsage("No data supplied")

    param_list = ['id', 'data']
    check_param(request.json, param_list)

    data_list = ['diagnostic', 'locator', 'sessionId', 'timestamp']
    check_param(request.json['data'], data_list)

    timestamp_list = ["_seconds", "_nanoseconds"]
    check_param(request.json['data']['timestamp'], timestamp_list)

    try:
        report = IndividualReportFactory.build(request.json)
        try:
            session.add(report)
            session.commit()
        except:
            session.rollback()
            abort(500, "Could not insert to database")
    except TypeError:
        raise InvalidUsage(
            "Some parameter was wrongly typed (string, int, array).")
    except:
        message = (
            "Could not create Individual Report. Probably malformed json. JSON:{%s}, %s",
            request.json)
        abort(400, message)
Exemplo n.º 3
0
def upload_geo_data():
    """
     csv headers: country_code,postal_code,latitude,longitude,region_id
    :return:
    """

    geo_locations = {}
    with open(str(GEOCODING_RAW_FILE), 'r') as file:
        reader = csv.reader(file)
        for i, row in enumerate(reader):
            if i == 0:
                continue  # skip header

            location = {
                'country_code': row[0],
                'postal_code': row[1],
                'longitude': float(row[2]),
                'latitude': float(row[3]),
                'region_id': row[4],
            }

            # remove duplicate by this...
            geo_locations[location['postal_code']] = location

    for key, loc in geo_locations.items():
        location = LocationModel(
            postal_code=loc['postal_code'],
            country_code=loc['country_code'],
            region_id=loc['region_id'],
            longitude=loc['longitude'],
            latitude=loc['latitude'],
        )
        session.add(location)
    session.commit()
Exemplo n.º 4
0
def test_one_person_update():

    nb_update = 1000
    timestamps = range(1584892474, 1584892474 + nb_update)
    session_id = str(uuid.uuid4())
    npa = 1246

    for timestamp in timestamps:
        doc_id = str(uuid.uuid4())[0:10]
        print(doc_id)
        report = IndividualReportModel(
            document_id=doc_id,
            diagnostic=random.randint(0, 4),
            locator=npa,
            session_id=session_id,
            timestamp=timestamp,
            analysis_done=False
        )
        session.add(report)
    session.commit()

    analysis_next_report(nb_update)

    _date = datetime.fromtimestamp(timestamps[0])
    q = session.query(DailyDiagnosticChangeModel)
    daily_change = q.filter_by(locator=npa, year=_date.year, month=_date.month, day=_date.day).first()

    total = daily_change.diagnostic_0 + \
            daily_change.diagnostic_1 + \
            daily_change.diagnostic_2 + \
            daily_change.diagnostic_3 + \
            daily_change.diagnostic_4

    print(total)
    assert total == 1
Exemplo n.º 5
0
def upload_geo_data():

    geo_locations = {}
    with open(str(GEO_DATA_FILE_FIRST), 'r') as file:
        reader = csv.reader(file)
        for i, row in enumerate(reader):
            if i == 0:
                continue  # skip header

            location = {
                'npa': int(row[5]),
                'town': row[4],
                'state': row[3],
                'longitude': float(row[2]),
                'latitude': float(row[1]),
            }

            # remove duplicate by this...
            geo_locations[location['npa']] = location

    with open(str(GEO_DATA_FILE_SECOND), 'r') as file:
        reader = csv.reader(file)
        for i, row in enumerate(reader):
            if i == 0:
                continue  # skip header

            location = {
                'npa': int(row[0]),
                'town': row[1],
                'state': row[2],
                'longitude': float(row[3]),
                'latitude': float(row[4]),
            }

            if location['npa'] in geo_locations:
                geo_locations[location['npa']]['town'] = location['town']
                geo_locations[
                    location['npa']]['longitude'] = location['longitude']
                geo_locations[
                    location['npa']]['latitude'] = location['latitude']
            else:
                # add it
                geo_locations[location['npa']] = location

    for key, loc in geo_locations.items():
        location = LocationModel(
            npa=loc['npa'],
            town=loc['town'],
            state=loc['state'],
            longitude=loc['longitude'],
            latitude=loc['latitude'],
        )
        session.add(location)
    session.commit()
Exemplo n.º 6
0
def test_insert_speed():

    timestamps = range(1584892474, 1584892474 + 1000)
    npa = 1240
    start_time = time.time()
    for timestamp in timestamps:
        doc_id = str(uuid.uuid4())[0:10]
        report = IndividualReportModel(
            document_id=doc_id,
            diagnostic=random.randint(0, 4),
            locator=npa,
            session_id=str(uuid.uuid4()),
            timestamp=timestamp
        )
        session.add(report)
    session.commit()
    print('Insertion time:',time.time() - start_time)
Exemplo n.º 7
0
def insert_fake_inidividual_reports():
    from_day = '2020-03-10'
    to_day = '2020-03-15'
    sample_per_day = 1000
    person_list = gen_fake_person_list(sample_per_day)

    from_date = datetime.strptime(from_day, DAY_FORMAT)
    to_date = datetime.strptime(to_day, DAY_FORMAT)

    _from = int(datetime.strptime(from_day, DAY_FORMAT).timestamp())
    _to = int(datetime.strptime(to_day, DAY_FORMAT).timestamp())

    current_day = from_date
    while current_day <= to_date:

        timestamp = current_day.timestamp()
        start_time = time.time()

        for sample in range(0, sample_per_day):
            person = random_list_element(person_list)

            doc_id = str(uuid.uuid4())[0:20]
            report = IndividualReportModel(
                document_id=doc_id,
                diagnostic=random.randint(0, 4),
                locator=person['npa'],
                session_id=person['session_id'],
                timestamp=(timestamp + sample) * 1000,  # to millisecond
                analysis_done=False)
            # print(report.timestamp)
            session.add(report)

        session.commit()
        spend_time = time.time() - start_time
        print(
            str(current_day) + ': uploaded ' + str(sample_per_day) +
            ' samples in ' + str(spend_time) + 's')
        current_day = current_day + timedelta(days=1)
Exemplo n.º 8
0
def report():
    data = request.json
    print(data)

    # if token submitted
    # if token valid
    # insert report into database
    # else
    # generate new token
    # insert report into database
    # return token

    if 'token' in data.keys():
        submitted_token = str(data['token'])
        if len(submitted_token) == 7:
            pass
        else:
            raise InvalidUsage("Invalid token")

        q = session.query(Token).filter_by(token=submitted_token).first()
        if q:
            try:
                report = ReportFactory.build(data)
                try:
                    session.add(report)
                    session.commit()
                except:
                    session.rollback()
                    abort(500, "Could not insert into database")
            except TypeError:
                raise InvalidUsage(
                    "Some parameter was wrongly typed (string, int, array).")

            if report.has_comorbid:
                data['report']['comorbid']['parent_id'] = report.document_id

                try:
                    comorbid = ComorbidFactory.build(
                        data['report']['comorbid'])
                    try:
                        session.add(comorbid)
                        session.commit()
                    except:
                        session.rollback()
                        abort(500, "Could not insert into database")
                except TypeError:
                    raise InvalidUsage(
                        "Some parameter was wrongly typed (string, int, array)."
                    )

            return make_response("", 201)

        else:
            raise InvalidUsage("Provided token doesn't exist")
    elif 'report' in data.keys():
        generated_token = generate_token()
        data['token'] = generated_token

        token = TokenFactory.build(generated_token)
        try:
            session.add(token)
            session.commit()
        except:
            session.rollback()
            abort(500, "Could not insert a new token into database")

        try:
            report = ReportFactory.build(data)
            try:
                session.add(report)
                session.commit()
            except:
                session.rollback()
                abort(500, "Could not insert into database")
        except TypeError:
            raise InvalidUsage(
                "Some parameter was wrongly typed (string, int, array).")

        if report.has_comorbid:
            data['report']['comorbid']['parent_id'] = report.document_id

            try:
                comorbid = ComorbidFactory.build(data['report']['comorbid'])
                try:
                    session.add(comorbid)
                    session.commit()
                except:
                    session.rollback()
                    abort(500, "Could not insert into database")
            except TypeError:
                raise InvalidUsage(
                    "Some parameter was wrongly typed (string, int, array).")

        return make_response(jsonify({"token": generated_token}), 201)

    else:
        raise InvalidUsage("Required parameters are missing")
Exemplo n.º 9
0
def download_hours_frame(_from_hour: str, _to_hour: str, worker_frame_minute: int = 10):
    if 60 % worker_frame_minute != 0:
        raise Exception("wrong worker period")

    _from = int(datetime.strptime(_from_hour, DATE_HOUR_PARAM_FORMAT).timestamp())
    _to = int(datetime.strptime(_to_hour, DATE_HOUR_PARAM_FORMAT).timestamp())

    now = datetime.now()  # current date and time
    now_str = now.strftime("%Y-%m-%d-%H-%M-%S")
    backup_dir = BACKUP_DOCUMENTS_PATH / ('download-time-' + now_str)
    if not backup_dir.exists():
        backup_dir.mkdir()

    # iterate over hours
    _previous = _from
    _next = _previous + 60 * 60
    while _next <= _to:
        _prev_str = datetime.fromtimestamp(_previous).strftime(DATE_HOUR_PARAM_FORMAT)
        _next_str = datetime.fromtimestamp(_next).strftime(DATE_HOUR_PARAM_FORMAT)
        print("Download from " + _prev_str + " to " + _next_str)

        minutes = list()

        # iterate over minutes
        _previous_minute = _previous
        _to_minute = _next
        _next_minute = _previous_minute + 60 * worker_frame_minute
        while _next_minute <= _to_minute:
            minutes.append((_previous_minute, _next_minute))
            _previous_minute = _next_minute
            _next_minute = _previous_minute + worker_frame_minute * 60

        hour_data = []
        # launch pool
        process_count = int(60 / worker_frame_minute)
        with Pool(processes=process_count) as pool:
            res = pool.map(download_worker, minutes)
            for worker_results in res:
                if len(worker_results) > 0:
                    for data in worker_results:
                        hour_data.append(data)

        # save results to json file
        if len(hour_data) > 0:
            file_path = backup_dir / ('data-' + _prev_str + '_' + _next_str + '.json')
            with open(str(file_path), 'w') as outfile:
                json.dump(hour_data, outfile)

        try:
            # try batch commit
            for report_json in hour_data:
                report = IndividualReportFactory.build(report_json)
                session.add(report)
            session.commit()
        except IntegrityError:
            session.rollback()
            # duplication, then one by one
            print('Warn: Duplicate in batch commit')
            for report_json in hour_data:
                try:
                    report = IndividualReportFactory.build(report_json)
                    session.add(report)
                    session.commit()
                except IntegrityError:
                    session.rollback()
                    print('Warn: Duplicate doc id: ' + report_json['id'])

        _previous = _next
        _next = _previous + (60 * 60)
Exemplo n.º 10
0
def insert_fake_inidividual_reports():
    from_day = '2020-03-10'
    to_day = '2020-03-15'
    sample_per_day = 1000
    person_list = gen_fake_person_list(sample_per_day)

    from_date = datetime.strptime(from_day, DAY_FORMAT)
    to_date = datetime.strptime(to_day, DAY_FORMAT)

    _from = int(datetime.strptime(from_day, DAY_FORMAT).timestamp())
    _to = int(datetime.strptime(to_day, DAY_FORMAT).timestamp())

    def random_bool():
        return random.randint(0, 1)

    current_day = from_date
    while current_day <= to_date:

        timestamp = current_day.timestamp()
        start_time = time.time()

        for sample in range(0, sample_per_day):
            person = random_list_element(person_list)

            doc_id = str(uuid.uuid4())[0:20]
            
            has_comorbid = random_bool()
            comorbid = Comorbid(
                hypertension = random_bool(),
                cardiovascular = random_bool(),
                pulmonary = random_bool(),
                cancer = random_bool(),
                diabetes = random_bool(),
                renal = random_bool(),
                neurological = random_bool(),
                respiratory = random_bool(),
            )
            report = IndividualReportModel(
                # Metadata
                document_id=doc_id,
                diagnostic=random.randint(0, 4),
                locator=person['npa'],
                session_id=person['session_id'],
                timestamp=(timestamp + sample ) * 1000,  # to millisecond
                analysis_done=False,
                # Actual data
                temp=enum.Scale3(random.randint(1, 3)).name,
                cough=enum.Scale4(random.randint(0, 3)).name,
                breathless=enum.Scale4(random.randint(0, 3)).name,
                energy=enum.Energy(random.randint(0, 4)).name,
                exposure=enum.Exposure(random.randint(0, 2)).name,
                has_comorbid=has_comorbid,
                comorbid=comorbid,
                compromised_immune=random_bool(),
                age=enum.Scale3(random.randint(1, 3)).name,
            )
            # print(report.timestamp)
            session.add(report)

        session.commit()
        spend_time = time.time() - start_time
        print(str(current_day) + ': uploaded ' + str(sample_per_day) + ' samples in ' + str(spend_time) + 's')
        current_day = current_day + timedelta(days=1)
Exemplo n.º 11
0
def analysis_next_report(collection_size: int):
    start_time_analysis = time.time()

    # load all previous reports
    report_done_df = pd.read_sql("SELECT * FROM individual_report WHERE analysis_done = 1",
                                 con=engine, index_col="document_id")

    # load the next collection of reports to analyse
    next_reports = pd.read_sql("SELECT * FROM individual_report WHERE analysis_done = 0 ORDER BY timestamp LIMIT " + str(collection_size),
                               con=engine, index_col="document_id")

    # timestamp from millisecond to second
    report_done_df["timestamp"] = report_done_df["timestamp"] / 1000
    next_reports["timestamp"] = next_reports["timestamp"] / 1000

    # Hold the reports to update in the end
    report_to_update = list()

    # Hold the reports to update in the end
    daily_delta_df = pd.DataFrame(columns=['locator','year','month','day','0','1','2','3','4','5'])

    sample = 0
    start_time = time.time()
    # iterate in all the report to analyse
    for doc_id, next_report in next_reports.iterrows():
        sample += 1

        _date = datetime.fromtimestamp(next_report['timestamp']) # to seconds

        delta = [0, 0, 0, 0, 0, 0]

        # search for last status
        same_session_id_df = report_done_df.loc[report_done_df['session_id'] == next_report['session_id']]
        if same_session_id_df.empty:
            delta[next_report['diagnostic']] = 1
            #print('Delta (new user)',str(delta))
        else:
            last_report = same_session_id_df.sort_values(by=['timestamp'], ascending=False).iloc[0]
            if last_report['diagnostic'] != next_report['diagnostic']:
                delta[last_report['diagnostic']] = -1
                delta[next_report['diagnostic']] = 1
            #print('Delta (existing user)', str(delta))


        # check if daily delta already exist
        previous_daily = daily_delta_df.loc[(daily_delta_df['year'] == _date.year)
                                             & (daily_delta_df['month'] == _date.month)
                                             & (daily_delta_df['day'] == _date.day)
                                             & (daily_delta_df['locator'] == next_report['locator'])]

        if previous_daily.empty:
            # then create it
            new_daily = dict()
            new_daily['locator'] = next_report['locator']
            new_daily['year'] = _date.year
            new_daily['month'] = _date.month
            new_daily['day'] = _date.day
            new_daily['0'] = delta[0]
            new_daily['1'] = delta[1]
            new_daily['2'] = delta[2]
            new_daily['3'] = delta[3]
            new_daily['4'] = delta[4]
            new_daily['5'] = delta[5]
            new_daily[str(next_report['diagnostic'])] = 1
            daily_delta_df = daily_delta_df.append(new_daily, ignore_index=True)
        else:
            # then update it
            previous_daily = previous_daily.iloc[0]  # convert dataframe to serie by ask the first (the only one) row
            # print('new diagnostic:',next_report['diagnostic'])
            daily_delta_df.at[previous_daily.name, "0"] = previous_daily["0"] + delta[0]
            daily_delta_df.at[previous_daily.name, "1"] = previous_daily["1"] + delta[1]
            daily_delta_df.at[previous_daily.name, "2"] = previous_daily["2"] + delta[2]
            daily_delta_df.at[previous_daily.name, "3"] = previous_daily["3"] + delta[3]
            daily_delta_df.at[previous_daily.name, "4"] = previous_daily["4"] + delta[4]
            daily_delta_df.at[previous_daily.name, "5"] = previous_daily["5"] + delta[5]

        if sample % 100 == 0:
            spend_time = time.time() - start_time
            print('Analysed ' + str(sample) + ' samples in ' + str(spend_time) + 's')
            start_time = time.time()

        # update report status
        next_report['analysis_done'] = 1
        # keep trace of the report to update in DB
        #report_to_update_df = report_to_update_df.append(next_report)
        report_to_update.append(doc_id)
        # update the done report list
        report_done_df = report_done_df.append(next_report)

    #print('====== Report to update ========')
    #print(report_to_update)
    print('====== Daily change ========')
    print(daily_delta_df)



    # upload daily changes
    for doc_id, daily_delta in daily_delta_df.iterrows():
        locator = daily_delta['locator']
        year = daily_delta['year']
        month = daily_delta['month']
        day = daily_delta['day']
        q = session.query(DailyDiagnosticChangeModel)
        daily_change = q.filter_by(locator=locator, year=year, month=month, day=day).first()

        if daily_change is None:
            # then create it
            daily_change = DailyDiagnosticChangeModel(
                locator=locator,
                year=year,
                month=month,
                day=day,
                diagnostic_0=daily_delta['0'],
                diagnostic_1=daily_delta['1'],
                diagnostic_2=daily_delta['2'],
                diagnostic_3=daily_delta['3'],
                diagnostic_4=daily_delta['4'],
                diagnostic_5=daily_delta['5'],
            )
            session.add(daily_change)
        else:
            # update diagnostic
            daily_change.diagnostic_0 += daily_delta['0']
            daily_change.diagnostic_1 += daily_delta['1']
            daily_change.diagnostic_2 += daily_delta['2']
            daily_change.diagnostic_3 += daily_delta['3']
            daily_change.diagnostic_4 += daily_delta['4']
            daily_change.diagnostic_5 += daily_delta['5']

    session.commit()

    # build query to update individual reports
    ids = '('
    idx = 0
    for doc_id in report_to_update:
        if idx != 0:
            ids += ','
        ids += '"'+doc_id+'"'
        idx += 1
    ids += ')'

    query = 'UPDATE individual_report SET analysis_done=1 WHERE individual_report.document_id IN ' + ids + '; '
    with engine.connect() as con:
        con.execute(query)

    print('Analysis of ' + str(collection_size) + ' in ' + str(time.time() - start_time_analysis) + 's')
"""
Created by nuffer at 3/25/20

"""
import json
from analysis.utils.factory import IndividualReportFactory
from analysis.utils.db import session
from analysis import BACKUP_DOCUMENTS_PATH

if __name__ == '__main__':

    for dir in BACKUP_DOCUMENTS_PATH.glob('*'):
        for file in dir.glob('*.json'):
            print('Loading file: ' + str(file))
            with open(str(file)) as json_file:
                data = json.load(json_file)
                for report in data:
                    report = IndividualReportFactory.build(report)
                    session.add(report)
                session.commit()