def test_temp_file_upload(self): url = reverse(save_temp_file) self.check_require_login(url) response = self.client.post(url, { 'f': SimpleUploadedFile("test_data.tsv", TSV_DATA), 'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA), }) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), {'errors': ['Received 2 files instead of 1']}) response = self.client.post(url, {'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA)}) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), {'errors': ['Unexpected file type: test_data.foo']}) response = self.client.post(url, {'f': SimpleUploadedFile("test_data.tsv", TSV_DATA)}) self.assertEqual(response.status_code, 200) response_json = response.json() self.assertDictEqual(response_json, { 'info': ['Parsed 3 rows from test_data.tsv'], 'uploadedFileId': mock.ANY, }) # Test loading uploaded file uploaded_file_id = response_json['uploadedFileId'] file_content = load_uploaded_file(uploaded_file_id) self.assertListEqual(file_content, PARSED_DATA) # File should be removed after loading it once with self.assertRaises(IOError): load_uploaded_file(uploaded_file_id) # Test uploading with returned data and test with file formats wb = xl.Workbook() ws = wb[wb.sheetnames[0]] ws['A1'], ws['B1'], ws['C1'] = ['Family ID', 'Individual ID', 'Notes'] ws['A2'], ws['B2'], ws['C2'] = [1, 'NA19675', 'An affected individual, additional metadata'] ws['A3'], ws['B3'] = [0, 'NA19678'] ws['A4'] = '' # for testing trimming trailing empty rows with NamedTemporaryFile() as tmp: wb.save(tmp) tmp.seek(0) xlsx_data = tmp.read() for ext, data in TEST_DATA_TYPES.items(): if ext == 'xls' or ext == 'xlsx': data = xlsx_data response = self.client.post( '{}?parsedData=true'.format(url), {'f': SimpleUploadedFile("test_data.{}".format(ext), data)}) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), { 'parsedData': PARSED_DATA, 'uploadedFileId': mock.ANY, })
def test_temp_file_upload(self): url = reverse(save_temp_file) self.check_require_login(url) response = self.client.post( url, { 'f': SimpleUploadedFile("test_data.tsv", TSV_DATA), 'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA), }) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), {'errors': ['Received 2 files instead of 1']}) response = self.client.post( url, {'invalid': SimpleUploadedFile("test_data.foo", TSV_DATA)}) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), {'errors': ['Unexpected file type: test_data.foo']}) response = self.client.post( url, {'f': SimpleUploadedFile("test_data.tsv", TSV_DATA)}) self.assertEqual(response.status_code, 200) response_json = response.json() self.assertDictEqual( response_json, { 'info': ['Parsed 3 rows from test_data.tsv'], 'uploadedFileId': mock.ANY, }) # Test loading uploaded file uploaded_file_id = response_json['uploadedFileId'] file_content = load_uploaded_file(uploaded_file_id) self.assertListEqual(file_content, PARSED_DATA) # File should be removed after loading it once with self.assertRaises(IOError): load_uploaded_file(uploaded_file_id) # Test uploading with returned data and test with file formats other than 'xls' and 'xlsx' for ext, data in TEST_DATA_TYPES.items(): if ext == 'xls' or ext == 'xlsx': data = self.xlsx_data response = self.client.post( '{}?parsedData=true'.format(url), {'f': SimpleUploadedFile("test_data.{}".format(ext), data)}) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), { 'parsedData': PARSED_DATA, 'uploadedFileId': mock.ANY, })
def save_individuals_table_handler(request, project_guid, upload_file_id): """Handler for 'save' requests to apply Individual tables previously uploaded through receive_individuals_table(..) Args: request (object): Django request object project_guid (string): project GUID uploadedFileId (string): a token sent to the client by receive_individuals_table(..) """ project = get_project_and_check_permissions(project_guid, request.user) json_records = load_uploaded_file(upload_file_id) updated_families, updated_individuals = add_or_update_individuals_and_families( project, individual_records=json_records, user=request.user ) # edit individuals individuals = _get_json_for_individuals(updated_individuals, request.user, add_sample_guids_field=True) individuals_by_guid = {individual['individualGuid']: individual for individual in individuals} families = _get_json_for_families(updated_families, request.user, add_individual_guids_field=True) families_by_guid = {family['familyGuid']: family for family in families} updated_families_and_individuals_by_guid = { 'individualsByGuid': individuals_by_guid, 'familiesByGuid': families_by_guid, } return create_json_response(updated_families_and_individuals_by_guid)
def save_hpo_table_handler(request, project_guid, upload_file_id): """ Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_hpo_table_handler """ project = get_project_and_check_permissions(project_guid, request.user) json_records, _ = load_uploaded_file(upload_file_id) individual_guids = [record[INDIVIDUAL_GUID_COLUMN] for record in json_records] individuals_by_guid = { i.guid: i for i in Individual.objects.filter(family__project=project, guid__in=individual_guids) } for record in json_records: individual = individuals_by_guid[record[INDIVIDUAL_GUID_COLUMN]] individual.features = [{'id': feature} for feature in record[HPO_TERMS_PRESENT_COLUMN]] individual.absent_features = [{'id': feature} for feature in record[HPO_TERMS_ABSENT_COLUMN]] individual.save() return create_json_response({ 'individualsByGuid': { individual['individualGuid']: individual for individual in _get_json_for_individuals( individuals_by_guid.values(), user=request.user, add_hpo_details=True, )}, })
def save_individuals_metadata_table_handler(request, project_guid, upload_file_id): """ Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_individuals_metadata_handler """ project = get_project_and_check_permissions(project_guid, request.user) json_records, _ = load_uploaded_file(upload_file_id) individual_guids = [record[INDIVIDUAL_GUID_COL] for record in json_records] individuals_by_guid = { i.guid: i for i in Individual.objects.filter(family__project=project, guid__in=individual_guids) } for record in json_records: individual = individuals_by_guid[record[INDIVIDUAL_GUID_COL]] update_model_from_json(individual, { k: record[k] for k in INDIVIDUAL_METADATA_FIELDS.keys() if k in record }, user=request.user) return create_json_response({ 'individualsByGuid': { individual['individualGuid']: individual for individual in _get_json_for_individuals( list(individuals_by_guid.values()), user=request.user, add_hpo_details=True, ) }, })
def _load_mapping_file(mapping_file_id): if not mapping_file_id: return {} id_mapping = {} for line in load_uploaded_file(mapping_file_id): if len(line) != 2: raise ValueError("Must contain 2 columns: " + ', '.join(line)) id_mapping[line[0]] = line[1] return id_mapping
def edit_families_handler(request, project_guid): """Edit or one or more Family records. Args: project_guid (string): GUID of project that contains these individuals. """ project = get_project_and_check_pm_permissions(project_guid, request.user) request_json = json.loads(request.body) if request_json.get('uploadedFileId'): modified_families = load_uploaded_file( request_json.get('uploadedFileId')) else: modified_families = request_json.get('families') if modified_families is None: return create_json_response({}, status=400, reason="'families' not specified") updated_families = [] for fields in modified_families: if fields.get('familyGuid'): family = Family.objects.get(project=project, guid=fields['familyGuid']) elif fields.get(PREVIOUS_FAMILY_ID_FIELD): family = Family.objects.get( project=project, family_id=fields[PREVIOUS_FAMILY_ID_FIELD]) else: family, _ = get_or_create_model_from_json( Family, { 'project': project, 'family_id': fields[FAMILY_ID_FIELD] }, update_json=None, user=request.user) update_family_from_json(family, fields, user=request.user, allow_unknown_keys=True) updated_families.append(family) updated_families_by_guid = { 'familiesByGuid': { family.guid: _get_json_for_family(family, request.user, add_individual_guids_field=True) for family in updated_families } } return create_json_response(updated_families_by_guid)
def _load_mapping_file(mapping_file_id, mapping_file_path): id_mapping = {} file_content = [] if mapping_file_id: file_content = load_uploaded_file(mapping_file_id) elif mapping_file_path: file_content = parse_file(mapping_file_path, file_iter(mapping_file_path)) for line in file_content: if len(line) != 2: raise ValueError("Must contain 2 columns: " + ', '.join(line)) id_mapping[line[0]] = line[1] return id_mapping
def edit_families_handler(request, project_guid): """Edit or one or more Family records. Args: project_guid (string): GUID of project that contains these individuals. """ request_json = json.loads(request.body) if request_json.get('uploadedFileId'): modified_families = load_uploaded_file(request_json.get('uploadedFileId')) else: modified_families = request_json.get('families') if modified_families is None: return create_json_response( {}, status=400, reason="'families' not specified") project = get_project_and_check_permissions(project_guid, request.user, CAN_EDIT) updated_families = [] for fields in modified_families: if fields.get('familyGuid'): family = Family.objects.get(project=project, guid=fields['familyGuid']) elif fields.get(PREVIOUS_FAMILY_ID_FIELD): family = Family.objects.get(project=project, family_id=fields[PREVIOUS_FAMILY_ID_FIELD]) else: family, _ = get_or_create_seqr_model(Family, project=project, family_id=fields[FAMILY_ID_FIELD]) update_family_from_json(family, fields, user=request.user, allow_unknown_keys=True) updated_families.append(family) updated_families_by_guid = { 'familiesByGuid': { family.guid: _get_json_for_family(family, request.user, add_individual_guids_field=True) for family in updated_families } } return create_json_response(updated_families_by_guid)
def create_project_from_workspace(request, namespace, name): """ Create a project when a cooperator requests to load data from an AnVIL workspace. :param request: Django request object :param namespace: The namespace (or the billing account) of the workspace :param name: The name of the workspace. It also be used as the project name :return the projectsByGuid with the new project json """ # Validate that the current user has logged in through google and has sufficient permissions workspace_meta = check_workspace_perm(request.user, CAN_EDIT, namespace, name, can_share=True, meta_fields=['workspace.bucketName']) projects = Project.objects.filter(workspace_namespace=namespace, workspace_name=name) if projects: error = 'Project "{}" for workspace "{}/{}" exists.'.format(projects.first().name, namespace, name) return create_json_response({'error': error}, status=400, reason=error) # Validate all the user inputs from the post body request_json = json.loads(request.body) missing_fields = [field for field in ['genomeVersion', 'uploadedFileId', 'dataPath'] if not request_json.get(field)] if missing_fields: error = 'Field(s) "{}" are required'.format(', '.join(missing_fields)) return create_json_response({'error': error}, status=400, reason=error) if not request_json.get('agreeSeqrAccess'): error = 'Must agree to grant seqr access to the data in the associated workspace.' return create_json_response({'error': error}, status=400, reason=error) # Add the seqr service account to the corresponding AnVIL workspace added_account_to_workspace = add_service_account(request.user, namespace, name) if added_account_to_workspace: _wait_for_service_account_access(request.user,namespace, name) # Validate the data path bucket_name = workspace_meta['workspace']['bucketName'] data_path = 'gs://{bucket}/{path}'.format(bucket=bucket_name.rstrip('/'), path=request_json['dataPath'].lstrip('/')) if not does_file_exist(data_path): error = 'Data file or path {} is not found.'.format(request_json['dataPath']) return create_json_response({'error': error}, status=400, reason=error) # Parse families/individuals in the uploaded pedigree file json_records = load_uploaded_file(request_json['uploadedFileId']) pedigree_records, errors, ped_warnings = parse_pedigree_table(json_records, 'uploaded pedigree file', user=request.user) errors += ped_warnings if errors: return create_json_response({'errors': errors}, status=400) # Create a new Project in seqr project_args = { 'name': name, 'genome_version': request_json['genomeVersion'], 'description': request_json.get('description', ''), 'workspace_namespace': namespace, 'workspace_name': name, } project = create_model_from_json(Project, project_args, user=request.user) # add families and individuals according to the uploaded individual records _, updated_individuals = add_or_update_individuals_and_families( project, individual_records=pedigree_records, user=request.user ) # Send an email to all seqr data managers try: _send_load_data_email(project, updated_individuals, data_path, request.user) except Exception as ee: message = 'Exception while sending email to user {}. {}'.format(request.user, str(ee)) logger.error(message) return create_json_response({'projectGuid': project.guid})
def load_uploaded_mapping_file(mapping_file_id): file_content = load_uploaded_file(mapping_file_id) return _load_mapping_file(file_content)
def load_uploaded_mapping_file(mapping_file_id): file_content = load_uploaded_file(mapping_file_id) return _load_mapping_file(file_content)