def test_upload_aws_valid_access_token(self): with my_vcr.use_cassette('ohapi/cassettes/test_upload_aws_valid_' + 'access_token') as cass: with patch('ohapi.api.open', mock_open(read_data=b'some stuff')): try: def fake_stat(arg): if arg == "foo": faked = list(orig_os_stat('/tmp')) faked[stat.ST_SIZE] = len('some stuff') return stat_result(faked) else: return orig_os_stat(arg) orig_os_stat = os.stat os.stat = fake_stat upload_aws(target_filepath='foo', metadata=FILE_METADATA, access_token=ACCESS_TOKEN, project_member_id=VALID_PMI1 ) self.assertEqual(cass.responses[0][ "status"]["code"], 201) self.assertEqual(cass.responses[1][ "status"]["code"], 200) self.assertEqual(cass.responses[2][ "status"]["code"], 200) finally: os.stat = orig_os_stat pass
def get_semantic_data(tf_in, tmp_dir, member, access_token): # Get the new semantic files and save those too if tf_in.name.endswith('.zip'): zf = zipfile.ZipFile(tf_in) for f in zf.filelist: if f.filename.endswith('.json') and len( f.filename.split('/')) == 5: file_content = json.loads(zf.read(f)) file_name = tmp_dir + '/' + f.filename.split('/')[-1] with open(file_name, 'w') as raw_file: json.dump(file_content, raw_file) metadata = { 'description': 'Semantic Google Location History JSON', 'tags': ['google location history', 'gps', 'semantic data'], 'creation_date': arrow.get().format(), } api.upload_aws(file_name, metadata, access_token, base_url=OH_BASE_URL, project_member_id=str( member['project_member_id'])) else: return None
def fetch_googlefit_data(oh_id): ''' Fetches all of the googlefit data for a given user ''' print("Started googlefit update task") try: current_dt = datetime.utcnow() oh_member = OpenHumansMember.objects.get(oh_id=oh_id) gf_member = oh_member.googlefit_member oh_access_token = oh_member.get_access_token() gf_access_token = gf_member.get_access_token() basenames_to_ids = get_existing_basenames_to_ids(oh_member) filesmonth = get_googlefit_data(oh_access_token, gf_access_token, current_dt) for fn, month in filesmonth: api.upload_aws(fn, create_metadata(month), oh_access_token, project_member_id=oh_id) basename = os.path.basename(fn) if basename in basenames_to_ids: file_id_to_delete = basenames_to_ids[basename] api.delete_file(oh_access_token, file_id=file_id_to_delete) gf_member.last_updated = arrow.now().format() gf_member.save() except Exception as e: import traceback print("Fetching googlefit data failed: {}".format(e)) print(traceback.format_exc()) # queue to retry later fetch_googlefit_data.apply_async(args=[oh_id], countdown=3600) raise
def upload_summaries(oh_user, summaries, file_name, existing_file_id): temp_dir, file = write_json_data_to_tmp_file(f'garmin-health-api-{file_name}.json', summaries) api.upload_aws(file, create_metadata(file_name), oh_user.get_access_token(), project_member_id=oh_user.oh_id, max_bytes=MAX_FILE_BYTES) if existing_file_id: api.delete_file(oh_user.get_access_token(), file_id=existing_file_id) os.remove(file) os.rmdir(temp_dir)
def process_file(dfile, access_token, member, metadata, taxonomy): try: verify_ubiome(dfile) tmp_directory = tempfile.mkdtemp() base_filename = dfile['basename'].replace('.zip', '') taxonomy_file = base_filename + '.taxonomy.json' raw_filename = temp_join(tmp_directory, taxonomy_file) metadata = { 'description': 'uBiome 16S taxonomy data, JSON format.', 'tags': ['json', 'uBiome', '16S'] } with open(raw_filename, 'w') as raw_file: json.dump(taxonomy, raw_file) raw_file.flush() api.upload_aws(raw_filename, metadata, access_token, base_url=OH_BASE_URL, project_member_id=str(member['project_member_id'])) except: api.message("uBiome integration: A broken file was deleted", "While processing your uBiome file " "we noticed that your file does not conform " "to the expected specifications and it was " "thus deleted. Email us as [email protected] if " "you think this file should be valid.", access_token, base_url=OH_BASE_URL) api.delete_file(access_token, str(member['project_member_id']), file_id=str(dfile['id']), base_url=OH_BASE_URL) raise
def add_jawbone_data(oh_member, data, endpoint): # delete old file and upload new to open humans tmp_directory = tempfile.mkdtemp() metadata = { 'tags': ['Jawbone'], 'updated_at': str(datetime.utcnow()), } if endpoint == 'moves': metadata['description'] = ('Jawbone "moves" data, including steps, ' 'calories, and activity') metadata['tags'].append('steps') elif endpoint == 'sleeps': metadata['description'] = ('Jawbone "sleeps" data, including time, ' 'duration, and depth estimates.') metadata['tags'].append('sleep') elif endpoint == 'heartrates': metadata['description'] = ('Jawbone "heartrates" data, including ' 'resting heartrates') metadata['tags'].append('heartrate') out_file = os.path.join( tmp_directory, 'jawbone-{}-data.json'.format(endpoint)) logger.debug('deleted old file for {}'.format(oh_member.oh_id)) api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename='jawbone-{}-data.json'.format(endpoint)) with open(out_file, 'w') as json_file: json.dump(data, json_file) json_file.flush() api.upload_aws(out_file, metadata, oh_member.access_token, project_member_id=oh_member.oh_id) logger.debug('added new jawbone {} file for {}'.format( endpoint, oh_member.oh_id))
def upload_summaries(oh_user, summaries, file_name, month, existing_file_id): fn = write_json_data_to_tmp_file(f'garmin-health-api-{file_name}-{month}.json', summaries) api.upload_aws(fn, create_metadata(file_name, month), oh_user.get_access_token(), project_member_id=oh_user.oh_id, max_bytes=MAX_FILE_BYTES) if existing_file_id: api.delete_file(oh_user.get_access_token(), file_id=existing_file_id)
def process_file(dfile, access_token, member, metadata): try: vcf_metadata = verify_vcf(dfile) except: api.message("VCF integration: A broken file was deleted", "While processing your VCF file " "we noticed that your file does not conform " "to the expected specifications and it was " "thus deleted. Email us as [email protected] if " "you think this file should be valid.", access_token, base_url=OH_BASE_URL) api.delete_file(access_token, str(member['project_member_id']), file_id=str(dfile['id']), base_url=OH_BASE_URL) raise try: tmp_directory = tempfile.mkdtemp() base_filename = dfile['basename'] # Save raw 23andMe genotyping to temp file. if base_filename.endswith('.gz'): base_filename = base_filename[0:-3] elif base_filename.endswith('.bz2'): base_filename = base_filename[0:-4] meta_filename = base_filename + '.metadata.json' raw_filename = temp_join(tmp_directory, meta_filename) metadata = {'description': 'VCF file metadata', 'tags': ['vcf']} with open(raw_filename, 'w') as raw_file: json.dump(vcf_metadata, raw_file) raw_file.flush() api.upload_aws(raw_filename, metadata, access_token, base_url=OH_BASE_URL, project_member_id=str(member['project_member_id'])) except: api.message("VCF integration: File could not be uploaded", "Something went wrong when processing your " "file. Please try to upload it again. " "Please email us as [email protected] if " "this keeps happening.", access_token, base_url=OH_BASE_URL) api.delete_file(access_token, str(member['project_member_id']), file_id=str(dfile['id']), base_url=OH_BASE_URL) raise
def replace_fitbit(oh_member, fitbit_data): print("replace function started") # delete old file and upload new to open humans tmp_directory = tempfile.mkdtemp() metadata = { 'description': 'Fitbit data.', 'tags': ['Fitbit', 'activity', 'steps'], 'updated_at': str(datetime.utcnow()), } out_file = os.path.join(tmp_directory, 'fitbit-data.json') logger.debug('deleted old file for {}'.format(oh_member.oh_id)) deleter = api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename="fitbit-data.json") print("delete response") print(deleter) print("trying to write to file") with open(out_file, 'w') as json_file: print("inside open file") # json.dump(fitbit_data, json_file) json_file.write(json.dumps(fitbit_data)) # print(json.dump(fitbit_data, json_file)) print("dumped, trying to flush") json_file.flush() print("attempting add response") addr = api.upload_aws(out_file, metadata, oh_member.access_token, project_member_id=oh_member.oh_id) print("add response") print(addr) logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
def test_upload_aws_invalid_access_token(self): with self.assertRaises(Exception): response = upload_aws( target_filepath='foo', metadata=FILE_METADATA, access_token=ACCESS_TOKEN_INVALID, project_member_id=VALID_PMI1) assert response.json() == {"detail": "Invalid token."}
def fetch_googlefit_data(oh_id, send_email=False): ''' Fetches all of the googlefit data for a given user ''' print("Started googlefit update task") try: current_dt = datetime.utcnow() oh_member = OpenHumansMember.objects.get(oh_id=oh_id) if not hasattr(oh_member, 'googlefit_member'): print("No googlefit connection exists for member") return gf_member = oh_member.googlefit_member oh_access_token = oh_member.get_access_token() gf_access_token = gf_member.get_access_token() basenames_to_ids = get_existing_basenames_to_ids(oh_member) filesmonth = get_googlefit_data(oh_access_token, gf_access_token, current_dt) for fn, month in filesmonth: api.upload_aws(fn, create_metadata(month), oh_access_token, project_member_id=oh_id, max_bytes=MAX_FILE_BYTES) basename = os.path.basename(fn) if basename in basenames_to_ids: file_id_to_delete = basenames_to_ids[basename] api.delete_file(oh_access_token, file_id=file_id_to_delete) gf_member.last_updated = arrow.now().format() gf_member.save() if send_email and len(filesmonth) > 0: send_first_success_email(oh_id, oh_access_token) elif send_email and len(filesmonth) == 0: send_first_no_data_email(oh_id, oh_access_token) except Exception as e: import traceback print("Fetching googlefit data failed: {}".format(e)) print(traceback.format_exc()) # queue to retry later fetch_googlefit_data.apply_async(args=[oh_id], countdown=3600) raise
def test_upload_aws_expired_access_token(self): with self.assertRaises(Exception): with patch('ohapi.api.open', mock_open(), create=True): response = upload_aws( target_filepath='foo', metadata=FILE_METADATA, access_token=ACCESS_TOKEN_EXPIRED, project_member_id=VALID_PMI1) assert response.json() == {"detail": "Expired token."}
def upload_user_dailies(garmin_user_id, user_map, existing_file_id): min_date = earliest_date(user_map) fn = write_jsonfile_to_tmp_dir('garmin-dailies.json', user_map) oh_user = get_oh_user_from_garmin_id(garmin_user_id) api.upload_aws(fn, create_metadata(), oh_user.get_access_token(), project_member_id=oh_user.oh_id, max_bytes=MAX_FILE_BYTES) oh_user.garmin_member.last_updated = datetime.now() if not oh_user.garmin_member.earliest_available_data or \ (oh_user.garmin_member.earliest_available_data and min_date < oh_user.garmin_member.earliest_available_data.replace(tzinfo=None)): oh_user.garmin_member.earliest_available_data = min_date oh_user.garmin_member.save() if existing_file_id: api.delete_file(oh_user.get_access_token(), file_id=existing_file_id)
def process_file(dfile, access_token, member, metadata): infile_suffix = dfile['basename'].split(".")[-1] tf_in = tempfile.NamedTemporaryFile(suffix="." + infile_suffix) tf_in.write(requests.get(dfile['download_url']).content) tf_in.flush() tmp_directory = tempfile.mkdtemp() filename_base = 'Location History.json' location_data = get_json(tf_in) if location_data: location_json = json.loads(location_data) output_file = tmp_directory + '/' + filename_base with open(output_file, 'w') as raw_file: json.dump(location_json, raw_file) metadata = { 'description': 'Google Location History JSON', 'tags': ['google location history', 'gps'], 'creation_date': arrow.get().format(), } api.upload_aws(output_file, metadata, access_token, base_url=OH_BASE_URL, project_member_id=str(member['project_member_id'])) get_semantic_data(tf_in, tmp_directory, member, access_token) else: api.message("Google Location History: A broken file was deleted", "While processing your Google Location History file " "we noticed that your file does not conform " "to the expected specifications and it was " "thus deleted. Please make sure you upload " "the right file:\nWe expect the file to be a " "single json file " "or a .zip file as downloaded from Google Takeout." " Please " "do not alter the original file, as unexpected " "additions can invalidate the file.", access_token, base_url=OH_BASE_URL) api.delete_file(access_token, str(member['project_member_id']), file_id=str(dfile['id']), base_url=OH_BASE_URL)
def replace_datasource(oh_member, source_data): # delete old file and upload new to open humans tmp_directory = tempfile.mkdtemp() metadata = { 'description': 'Dummy data for demo.', 'tags': ['demo', 'dummy', 'test'], 'updated_at': str(datetime.utcnow()), } out_file = os.path.join(tmp_directory, 'dummy-data.json') logger.debug('deleted old file for {}'.format(oh_member.oh_id)) api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename="dummy-data.json") with open(out_file, 'w') as json_file: json.dump(source_data, json_file) json_file.flush() api.upload_aws(out_file, metadata, oh_member.access_token, project_member_id=oh_member.oh_id) logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
def test_upload_aws_invalid_metadata_without_description(self): with self.assertRaises(Exception): with patch('ohapi.api.open', mock_open(), create=True): response = upload_aws( target_filepath='foo', metadata=FILE_METADATA_INVALID_WITHOUT_DESC, access_token=ACCESS_TOKEN, project_member_id=VALID_PMI1) assert response.json() == { "metadata": ["\"description\" is a " + "required field of the metadata"]}
def replace_moves(oh_member, moves_data): # delete old file and upload new to open humans tmp_directory = tempfile.mkdtemp() metadata = { 'description': 'Moves GPS maps, locations, and steps data.', 'tags': ['GPS', 'Moves', 'steps'], 'updated_at': str(datetime.utcnow()), } out_file = os.path.join(tmp_directory, 'moves-storyline-data.json') logger.debug('deleted old file for {}'.format(oh_member.oh_id)) api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename="moves-storyline-data.json") with open(out_file, 'w') as json_file: json.dump(moves_data, json_file) json_file.flush() api.upload_aws(out_file, metadata, oh_member.access_token, project_member_id=oh_member.oh_id) logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
def replace_rescuetime(oh_member, rescuetime_data): # delete old file and upload new to open humans tmp_directory = tempfile.mkdtemp() metadata = { 'description': 'RescueTime productivity data.', 'tags': ['Rescuetime', 'productivity'], 'updated_at': str(datetime.utcnow()), } out_file = os.path.join(tmp_directory, 'rescuetime.json') logger.debug('deleted old file for {}'.format(oh_member.oh_id)) api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename="rescuetime.json") with open(out_file, 'w') as json_file: json.dump(rescuetime_data, json_file) json_file.flush() api.upload_aws(out_file, metadata, oh_member.access_token, project_member_id=oh_member.oh_id) logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
def process_github(oh_id): """ Update the github file for a given OH user """ try: logger.debug('Starting github processing for {}'.format(oh_id)) oh_member = OpenHumansMember.objects.get(oh_id=oh_id) oh_access_token = oh_member.get_access_token( client_id=settings.OPENHUMANS_CLIENT_ID, client_secret=settings.OPENHUMANS_CLIENT_SECRET) #github_data = get_existing_github_data(oh_access_token)# github_member = oh_member.datasourcemember github_access_token = github_member.get_access_token( client_id=settings.GITHUB_CLIENT_ID, client_secret=settings.GITHUB_CLIENT_SECRET) #print("OH access token: {}".format(oh_access_token)) gh_file = gh_api.get_github_data(oh_access_token, github_access_token) existing_file_ids = get_existing_file_ids(oh_member) print(existing_file_ids) api.upload_aws(gh_file, create_file_metadata(), oh_access_token, project_member_id=oh_id, max_bytes=MAX_FILE_BYTES) for id in existing_file_ids: api.delete_file(oh_access_token, file_id=id) github_member.last_updated = arrow.now().format() github_member.save() except Exception as e: import traceback print("Fetching github data failed: {}".format(e)) print(traceback.format_exc()) # queue to retry later process_github.apply_async(args=[oh_id], countdown=4 * 3600) raise
def replace_twitter(oh_member, twitter_data): # delete old file and upload new to open humans tmp_directory = tempfile.mkdtemp() metadata = { 'description': 'Twitter activity feed, repository contents and stars data.', 'tags': ['demo', 'Twitter', 'test'], 'updated_at': str(datetime.utcnow()), } out_file = os.path.join(tmp_directory, 'twitter-data.json') logger.debug('deleted old file for {}'.format(oh_member.oh_id)) api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename="twitter-data.json") with open(out_file, 'w') as json_file: json.dump(twitter_data, json_file) json_file.flush() api.upload_aws(out_file, metadata, oh_member.access_token, project_member_id=oh_member.oh_id) logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
def replace_nokia(oh_member, nokia_data): """ Delete any old file and upload new """ tmp_directory = tempfile.mkdtemp() metadata = { 'tags': ['nokiahealthdata', 'health', 'measure'], 'description': 'File with Nokia Health data', 'updated_at': str(datetime.utcnow()), } filename = 'nokiahealthdata.json' out_file = os.path.join(tmp_directory, filename) logger.debug('deleted old file for {}'.format(oh_member.oh_id)) api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename=filename) with open(out_file, 'w') as json_file: json.dump(nokia_data, json_file) json_file.flush() api.upload_aws(out_file, metadata, oh_member.access_token, project_member_id=oh_member.oh_id) logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
def process_source(oh_id): oh_member = OpenHumansMember.objects.get(oh_id=oh_id) OUT_DIR = os.environ.get('OUT_DIR') metadata = { 'description': 'Imputed genotypes from Imputer', 'tags': ['genomics'], 'updated_at': str(datetime.utcnow()), } oh_access_token = oh_member.get_access_token( client_id=settings.OPENHUMANS_CLIENT_ID, client_secret=settings.OPENHUMANS_CLIENT_SECRET) # this works below try: api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename="member.imputed.vcf.bz2") except FileNotFoundError: logger.info('New Source File') api.upload_aws('{}/{}/member.imputed.vcf.bz2'.format(OUT_DIR, oh_id), metadata, oh_access_token, project_member_id=oh_member.oh_id, max_bytes=256000000)
def upload_notebook(notebook_content, notebook_name, access_token, project_member_id): """ Upload a notebook to the Personal Data Notebook project on Open Humans. """ tmp_directory = tempfile.mkdtemp() metadata = { 'description': 'A Personal Data Notebook', 'tags': ['personal data notebook', 'notebook', 'jupyter'] } out_file = os.path.join(tmp_directory, notebook_name) with open(out_file, 'wb') as tmp_notebook: tmp_notebook.write(notebook_content) tmp_notebook.flush() print(out_file) upload_response = api.upload_aws(out_file, metadata, access_token, project_member_id=project_member_id) return upload_response
def process_runkeeper(oh_id): """ Data is split per-year, in JSON format. Each JSON is an object (dict) in the following format (pseudocode): { 'background_activities': [ { key: value for each of BACKGROUND_DATA_KEYS }, { key: value for each of BACKGROUND_DATA_KEYS }, ... ], 'fitness_activities': [ { 'path': { key: value for each of FITNESS_PATH_KEYS }, and key: value for each of the FITNESS_ACTIVITY_KEYS }, { 'path': { key: value for each of FITNESS_PATH_KEYS }, and key: value for each of the FITNESS_ACTIVITY_KEYS }, ... ] } Notes: - items are sorted according to start_time or timestamp - The item_uri for fitness_activities matches item_uri in fitness_activity_sharing. """ oh_member = OpenHumansMember.objects.get(oh_id=oh_id) oh_access_token = oh_member.get_access_token( client_id=settings.OPENHUMANS_CLIENT_ID, client_secret=settings.OPENHUMANS_CLIENT_SECRET) runkeeper_member = oh_member.datasourcemember print('start processing data for {}'.format(runkeeper_member.runkeeper_id)) access_token = runkeeper_member.access_token user_data = runkeeper_query('/user', access_token) runkeeper_member.runkeeper_id = user_data['userID'] # Get activity data. fitness_activity_path = '{}?pageSize={}'.format( user_data['fitness_activities'], PAGESIZE) fitness_activity_items, complete_fitness_activity_years = yearly_items( get_items(path=fitness_activity_path, access_token=access_token)) # Background activities. background_activ_path = '{}?pageSize={}'.format( user_data['background_activities'], PAGESIZE) background_activ_items, complete_background_activ_years = yearly_items( get_items(background_activ_path, access_token)) all_years = sorted( set( list(fitness_activity_items.keys()) + list(background_activ_items.keys()))) all_completed_years = set(complete_fitness_activity_years + complete_background_activ_years) for year in all_years: outdata = {'fitness_activities': [], 'background_activities': []} fitness_items = sorted( fitness_activity_items.get(year, []), key=lambda item: datetime.strptime(item['start_time'], '%a, %d %b %Y %H:%M:%S')) for item in fitness_items: item_data = runkeeper_query(item['uri'], access_token) item_data_out = data_for_keys(item_data, FITNESS_SUMMARY_KEYS) item_data_out['path'] = [ data_for_keys(datapoint, FITNESS_PATH_KEYS) for datapoint in item_data['path'] ] outdata['fitness_activities'].append(item_data_out) background_items = sorted( background_activ_items.get(year, []), key=lambda item: datetime.strptime(item['timestamp'], '%a, %d %b %Y %H:%M:%S')) for item in background_items: outdata['background_activities'].append( data_for_keys(item, BACKGROUND_DATA_KEYS)) filename = 'Runkeeper-activity-data-{}.json'.format(str(year)) temp_directory = tempfile.mkdtemp() filepath = os.path.join(temp_directory, filename) with open(filepath, 'w') as f: json.dump(outdata, f, indent=2, sort_keys=True) f.flush() metadata = { 'description': ('Runkeeper GPS maps and imported ' 'activity data.'), 'tags': ['GPS', 'Runkeeper'], 'dataYear': year, 'complete': year in all_completed_years, } api.delete_file(oh_member.access_token, oh_member.oh_id, file_basename=filename) api.upload_aws(filepath, metadata, oh_access_token, project_member_id=oh_member.oh_id) runkeeper_member.last_updated = arrow.now().format() runkeeper_member.save() print('finished processing data for {}'.format( runkeeper_member.runkeeper_id))
def process_file(dfile, access_token, member, metadata): try: infile_suffix = dfile['basename'].split(".")[-1] tf_in = tempfile.NamedTemporaryFile(suffix="." + infile_suffix) tf_in.write(requests.get(dfile['download_url']).content) tf_in.flush() tmp_directory = tempfile.mkdtemp() filename_base = 'AncestryDNA-genotyping' raw_ancestry, chr_sex = clean_raw_ancestrydna(tf_in) raw_ancestry.seek(0) vcf_ancestry_unsort = vcf_from_raw_ancestrydna(raw_ancestry, chr_sex) # Save raw Ancestry genotyping to temp file. raw_filename = filename_base + '.txt' raw_filename = temp_join(tmp_directory, raw_filename) metadata = { 'description': 'AncestryDNA full genotyping data, original format', 'tags': ['AncestryDNA', 'genotyping'], 'creation_date': arrow.get().format(), } with open(raw_filename, 'w') as raw_file: raw_ancestry.seek(0) shutil.copyfileobj(raw_ancestry, raw_file) raw_file.flush() api.upload_aws(raw_filename, metadata, access_token, base_url=OH_BASE_URL, project_member_id=str(member['project_member_id'])) # Save VCF Ancestry genotyping to temp file. vcf_filename = filename_base + '.vcf.bz2' vcf_filename = temp_join(tmp_directory, vcf_filename) metadata = { 'description': 'AncestryDNA full genotyping data, VCF format', 'tags': ['AncestryDNA', 'genotyping', 'vcf'], 'creation_date': arrow.get().format() } vcf_ancestry_unsort.seek(0) vcf_ancestry = sort_vcf(vcf_ancestry_unsort) with bz2.BZ2File(vcf_filename, 'w') as vcf_file: vcf_ancestry.seek(0) for i in vcf_ancestry: vcf_file.write(i) api.upload_aws(vcf_filename, metadata, access_token, base_url=OH_BASE_URL, project_member_id=str(member['project_member_id'])) except: api.message("AncestryDNA integration: A broken file was deleted", "While processing your AncestryDNA file " "we noticed that your file does not conform " "to the expected specifications and it was " "thus deleted. Please make sure you upload " "the right file:\nWe expect the file to be a " "single txt file (either unzipped, bz2 zipped or gzipped) " "or a .zip file that contains a single txt file (this is " " what you can download from Ancestry right away) Please " "do not alter the original txt file, as unexpected " "additions can invalidate the file.", access_token, base_url=OH_BASE_URL) raise finally: api.delete_file(access_token, str(member['project_member_id']), file_id=str(dfile['id']), base_url=OH_BASE_URL)