def read_daily_report_data_csv_JHU(file): """ Extract data from a single daile report file from JHU. :param file: Path to the CSV file :return: """ log.info('Read JHU CSV file {}'.format(file)) countries = NodeSet(['Country'], ['name']) provinces = NodeSet(['Province'], ['name']) updates = NodeSet(['DailyReport'], ['uuid']) province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name']) province_in_country.unique = True province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid']) with open(file, 'rt') as csvfile: rows = csv.reader(csvfile, delimiter=',', quotechar='"') # skip header next(rows) for row in rows: country = row[1] province = row[0] # if no name for province, use country name if not province: province = '{}_complete'.format(country) date = parse(row[2]) uuid = country+province+str(date) confirmed = int(row[3]) if row[3] else 'na' death = int(row[4]) if row[4] else 'na' recovered = int(row[5]) if row[5] else 'na' lat = row[6] if len(row) >= 7 else None long = row[7] if len(row) >= 8 else None province_dict = {'name': province} if lat and long: province_dict['latitude'] = lat province_dict['longitude'] = long provinces.add_unique(province_dict) countries.add_unique({'name': country}) updates.add_unique( {'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid}) province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'}) province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'}) return countries, provinces, updates, province_in_country, province_rep_update
def read_daily_report_data_csv_JHU(file): """ Extract data from a single daile report file from JHU. Old format (until 03-21-2020) Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude New format: FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key :param file: Path to the CSV file :return: """ log.info('Read JHU CSV file {}'.format(file)) # understand if old fromat ( countries = NodeSet(['Country'], ['name']) provinces = NodeSet(['Province'], ['name']) updates = NodeSet(['DailyReport'], ['uuid']) province_in_country = RelationshipSet('PART_OF', ['Province'], ['Country'], ['name'], ['name']) province_in_country.unique = True province_rep_update = RelationshipSet('REPORTED', ['Province'], ['DailyReport'], ['name'], ['uuid']) with open(file, 'rt') as csvfile: rows = csv.reader(csvfile, delimiter=',', quotechar='"') # skip header header = next(rows) if len(header) > 8: file_type = 'new' else: file_type = 'old' log.info("File type: {}".format(file_type)) for row in rows: if file_type == 'old': country, province, date, confirmed, death, recovered, lat, long = parse_jhu_old_file_row( row) elif file_type == 'new': country, province, date, confirmed, death, recovered, lat, long = parse_jhu_new_file_row( row) province_dict = {'name': province} if lat and long: province_dict['latitude'] = lat province_dict['longitude'] = long uuid = country + province + str(date) provinces.add_unique(province_dict) countries.add_unique({'name': country}) updates.add_unique({ 'date': date, 'confirmed': confirmed, 'death': death, 'recovered': recovered, 'uuid': uuid }) province_in_country.add_relationship({'name': province}, {'name': country}, {'source': 'jhu'}) province_rep_update.add_relationship({'name': province}, {'uuid': uuid}, {'source': 'jhu'}) return countries, provinces, updates, province_in_country, province_rep_update