def test_import_global_json( tmp_path: Path, user, project_factory, samples_dir: Path, sample_scans, user_api_client, ): json_file = str(tmp_path / 'import.json') with open(json_file, 'w') as fd: fd.write(json.dumps(generate_import_json(samples_dir, sample_scans))) global_settings = GlobalSettings.load() global_settings.import_path = json_file global_settings.save() # Create projects targeted by the import project_ohsu = project_factory(import_path=json_file, name='ohsu') project_ucsd = project_factory(import_path=json_file, name='ucsd') resp = user_api_client().post('/api/v1/global/import') if user.is_superuser: assert resp.status_code == 204 # The import should update the correctly named projects, but not the original import project project_ohsu.refresh_from_db() project_ucsd.refresh_from_db() assert project_ohsu.experiments.count() == 1 assert project_ohsu.experiments.get().scans.count() == 1 assert project_ucsd.experiments.count() == 1 assert project_ucsd.experiments.get().scans.count() == 1 else: assert resp.status_code == 403
def validate_import_dict(import_dict, project: Optional[Project]): import_schema = Schema({ 'projects': { And(Use(str)): { 'experiments': { And(Use(str)): { 'scans': { And(Use(str)): { 'type': And(Use(str)), 'frames': { And(Use(int)): { 'file_location': And(Use(str)) } }, } } } } } } }) try: import_schema.validate(import_dict) import_dict = validate_file_locations(import_dict, project) except SchemaError: import_path = GlobalSettings.load( ).import_path if project is None else project.import_path raise APIException(f'Invalid format of import file {import_path}') if not project: for project_name in import_dict['projects']: if not Project.objects.filter(name=project_name).exists(): raise APIException(f'Project {project_name} does not exist') return import_dict
def test_import_global_csv(tmp_path, user, project_factory, sample_scans, user_api_client): # Generate an import CSV for two projects with the same data csv_file = str(tmp_path / 'import.csv') with open(csv_file, 'w') as fd: output, _writer = generate_import_csv(sample_scans) fd.write(output.getvalue()) global_settings = GlobalSettings.load() global_settings.import_path = csv_file global_settings.save() # Create projects targeted by the import project_ohsu = project_factory(import_path=csv_file, name='ohsu') project_ucsd = project_factory(import_path=csv_file, name='ucsd') resp = user_api_client().post('/api/v1/global/import') if user.is_superuser: assert resp.status_code == 204 # The import should update the correctly named projects, but not the original import project project_ohsu.refresh_from_db() project_ucsd.refresh_from_db() assert project_ohsu.experiments.count() == 1 assert project_ohsu.experiments.get().scans.count() == 1 assert project_ucsd.experiments.count() == 1 assert project_ucsd.experiments.get().scans.count() == 1 else: assert resp.status_code == 403
def import_data(project_id: Optional[str]): if project_id is None: project = None import_path = GlobalSettings.load().import_path else: project = Project.objects.get(id=project_id) import_path = project.import_path if import_path.endswith('.csv'): if import_path.startswith('s3://'): buf = _download_from_s3(import_path).decode('utf-8') else: with open(import_path) as fd: buf = fd.read() import_dict = import_dataframe_to_dict(pandas.read_csv(StringIO(buf))) elif import_path.endswith('.json'): if import_path.startswith('s3://'): import_dict = json.loads(_download_from_s3(import_path)) else: with open(import_path) as fd: import_dict = json.load(fd) else: raise APIException( f'Invalid import file {import_path}. Must be CSV or JSON.') import_dict = validate_import_dict(import_dict, project) perform_import.delay(import_dict, project_id)
def perform_export(project_id: Optional[str]): data = [] if project_id is None: # A global export should export all projects projects = Project.objects.all() export_path = GlobalSettings.load().export_path else: # A normal export should only export the current project project = Project.objects.get(id=project_id) projects = [project] export_path = project.export_path for project_object in projects: for frame_object in Frame.objects.filter( scan__experiment__project=project_object): data.append([ project_object.name, frame_object.scan.experiment.name, frame_object.scan.name, frame_object.scan.scan_type, frame_object.frame_number, frame_object.raw_path, ]) export_df = pandas.DataFrame(data, columns=IMPORT_CSV_COLUMNS) export_df.to_csv(export_path, index=False)
def export_data(project_id: Optional[str]): if not project_id: export_path = GlobalSettings.load().export_path else: project = Project.objects.get(id=project_id) export_path = project.export_path parent_location = Path(export_path).parent if not parent_location.exists(): raise APIException( f'No such location {parent_location} to create export file.') perform_export.delay(project_id)
def validate_file_locations(input_dict, project): if not isinstance(input_dict, dict): return input_dict import_path = GlobalSettings.load( ).import_path if project is None else project.import_path for key, value in input_dict.items(): if key == 'file_location': raw_path = Path(value) if not value.startswith('s3://'): if not raw_path.is_absolute(): # not an absolute file path; refer to project import csv location raw_path = Path(import_path).parent.parent / raw_path if not raw_path.exists(): raise APIException(f'Could not locate file "{raw_path}".') input_dict[key] = str(raw_path) if 's3://' not in value else value else: input_dict[key] = validate_file_locations(value, project) return input_dict
def get_object(self): return GlobalSettings.load()