def createLinkVersion(syn, genie_version, caseListEntities, genePanelEntities, databaseSynIdMappingDf): versioning = genie_version.split(".") logger.info(genie_version) main = versioning[0] releaseSynId = databaseSynIdMappingDf['Id'][databaseSynIdMappingDf['Database'] == 'release'].values[0] publicSynId = databaseSynIdMappingDf['Id'][databaseSynIdMappingDf['Database'] == 'public'].values[0] #second = ".".join(versioning[1:]) releases = synapseutils.walk(syn, releaseSynId) mainReleaseFolders = next(releases)[1] releaseFolderSynId = [synId for folderName, synId in mainReleaseFolders if folderName == "Release %s" % main] if len(releaseFolderSynId) > 0: secondRelease = synapseutils.walk(syn, releaseFolderSynId[0]) secondReleaseFolders = next(secondRelease)[1] secondReleaseFolderSynIdList = [synId for folderName, synId in secondReleaseFolders if folderName == genie_version] if len(secondReleaseFolderSynIdList) > 0: secondReleaseFolderSynId = secondReleaseFolderSynIdList[0] else: secondReleaseFolderSynId = syn.store(synapseclient.Folder(genie_version, parent = releaseFolderSynId[0])).id else: mainReleaseFolderId = syn.store(synapseclient.Folder("Release %s" % main, parent = releaseSynId)).id secondReleaseFolderSynId = syn.store(synapseclient.Folder(genie_version, parent = mainReleaseFolderId)).id caselistId = db_to_staging.find_caselistid(syn, secondReleaseFolderSynId) publicRelease = syn.getChildren(publicSynId) [syn.store(synapseclient.Link(ents['id'], parent=secondReleaseFolderSynId, targetVersion=ents['versionNumber'])) for ents in publicRelease if ents['type'] != "org.sagebionetworks.repo.model.Folder" and ents['name'] != "data_clinical.txt" and not ents['name'].startswith("data_gene_panel")] [syn.store(synapseclient.Link(ents.id, parent=caselistId, targetVersion=ents.versionNumber)) for ents in caseListEntities] #Store gene panels [syn.store(synapseclient.Link(ents.id, parent=secondReleaseFolderSynId, targetVersion=ents.versionNumber)) for ents in genePanelEntities]
def test_data_pull_non_data_folder(syn_test_helper, mk_tempfile, mk_uniq_string, mk_kiproject): syn_project = syn_test_helper.create_project() syn_folder1 = syn_test_helper.client().store( synapseclient.Folder(name='Folder1', parent=syn_project)) syn_test_helper.client().store( synapseclient.File(path=mk_tempfile(), parent=syn_folder1)) syn_folder2 = syn_test_helper.client().store( synapseclient.Folder(name='Folder2', parent=syn_folder1)) syn_test_helper.client().store( synapseclient.File(path=mk_tempfile(), parent=syn_folder2)) syn_folder3 = syn_test_helper.client().store( synapseclient.Folder(name='Folder3', parent=syn_folder2)) syn_test_helper.client().store( synapseclient.File(path=mk_tempfile(), parent=syn_folder3)) syn_folder4 = syn_test_helper.client().store( synapseclient.Folder(name='Folder4', parent=syn_folder3)) syn_test_helper.client().store( synapseclient.File(path=mk_tempfile(), parent=syn_folder4)) syn_folder5 = syn_test_helper.client().store( synapseclient.Folder(name='Folder5', parent=syn_folder4)) syn_test_helper.client().store( synapseclient.File(path=mk_tempfile(), parent=syn_folder5)) kiproject = mk_kiproject() kiproject.data_add(DataUri('syn', syn_folder1.id).uri, data_type=kiproject.data_types[0]) kiproject.data_pull()
def create_folders(root, folder_list): """Create hierarchy of Synapse folders. Args: root: Synapse ID of a container. folder_list: list of folders in the same format as os.walk. Returns: A dictionary mapping the local folder to the created Synapse folder ID. """ syn = Synapse().client() dirlookup = {'.': root} for directory, subdirectories, _ in folder_list: folder = dirlookup.get(directory, None) if not folder: folder = synapseclient.Folder(directory, parent=dirlookup[directory]) folder = syn.store(folder) dirlookup[directory] = folder for subdir in subdirectories: curr = os.path.join(directory, subdir) # pylint: disable=line-too-long subfolder = dirlookup.get( curr, syn.store(synapseclient.Folder(subdir, parent=folder))) dirlookup[curr] = subfolder return dirlookup
def _getSynapseDir(syn, synapse_id, local_root, dir_list): """ 1. Walks through Synapse parent location hierarchy. 2. update folders in Synapse to match the local dir, 3. get key-value pairs of dirname and synapse id :param syn: :param synapse_id: :param local_root: :param dir_list: :return: """ synapse_dir = {} synapse_root = syn.get(synapse_id) for (dirpath, dirpath_id), _, _ in synapseutils.walk(syn, synapse_id): dirpath = dirpath.replace(synapse_root.name, os.path.abspath(local_root)) synapse_dir[dirpath] = dirpath_id for directory in dir_list: if not synapse_dir.has_key(directory): new_folder = synapseclient.Folder( os.path.basename(directory), synapse_dir[os.path.dirname(directory)]) new_folder = syn.store(new_folder) synapse_dir[directory] = new_folder.id return synapse_dir
def _find_or_create_syn_folder(self, syn_parent, folder_name): """Finds or creates a folder in Synapse. Args: syn_parent: The Synapse entity to find or create the folder under. folder_name: The name of the folder to find or create. Returns: synapseclient.Folder """ # TODO: can any of this be cached? syn_entity_id = SynapseAdapter.client().findEntityId(folder_name, parent=syn_parent) if syn_entity_id: syn_entity = SynapseAdapter.client().get(syn_entity_id) if self._is_folder(syn_entity): return syn_entity else: raise Exception( 'Cannot create folder, name: {0} already taken by another entity: {1}' .format(folder_name, syn_entity.id)) return SynapseAdapter.client().store( synapseclient.Folder(name=folder_name, parent=syn_parent))
def test_get_or_create_folder__call(): """Makes sure correct parameters are called""" folder_name = str(uuid.uuid1()) parentid = str(uuid.uuid1()) folder = synapseclient.Folder(name=folder_name, parentId=parentid) returned = synapseclient.Folder(name=folder_name, id=str(uuid.uuid1()), parentId=parentid) with patch.object(CREATE_CLS, "_find_by_obj_or_create", return_value=returned) as patch_find_or_create: new_folder = CREATE_CLS.get_or_create_folder(name=folder_name, parentId=parentid) assert new_folder == returned patch_find_or_create.assert_called_once_with(folder)
def test_valid__check_parentid_permission_container(): """ Test that parentid specified is a container and have permissions to access """ parentid = "syn123" folder_ent = synapseclient.Folder("foo", parentId=parentid) with patch.object(syn, "get", return_value=folder_ent): validate._check_parentid_permission_container(syn, parentid)
def test_command_copy(): """Tests the 'synapse cp' function""" # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store(synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) # Create and upload a file in Folder repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annots = {'test': ['hello_world']} # Create, upload, and set annotations on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(synapseclient.File(filename, parent=folder_entity)) externalURL_entity = syn.store(synapseclient.File(repo_url, name='rand', parent=folder_entity, synapseStore=False)) syn.setAnnotations(file_entity, annots) syn.setAnnotations(externalURL_entity, annots) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # Test cp function output = run('synapse', '--skip-checks', 'cp', file_entity.id, '--destinationId', project_entity.id) output_URL = run('synapse', '--skip-checks', 'cp', externalURL_entity.id, '--destinationId', project_entity.id) copied_id = parse(r'Copied syn\d+ to (syn\d+)', output) copied_URL_id = parse(r'Copied syn\d+ to (syn\d+)', output_URL) # Verify that our copied files are identical copied_ent = syn.get(copied_id) copied_URL_ent = syn.get(copied_URL_id, downloadFile=False) schedule_for_cleanup(copied_id) schedule_for_cleanup(copied_URL_id) copied_ent_annot = syn.getAnnotations(copied_id) copied_url_annot = syn.getAnnotations(copied_URL_id) copied_prov = syn.getProvenance(copied_id)['used'][0]['reference']['targetId'] copied_url_prov = syn.getProvenance(copied_URL_id)['used'][0]['reference']['targetId'] # Make sure copied files are the same assert_equals(copied_prov, file_entity.id) assert_equals(copied_ent_annot, annots) assert_equals(copied_ent.properties.dataFileHandleId, file_entity.properties.dataFileHandleId) # Make sure copied URLs are the same assert_equals(copied_url_prov, externalURL_entity.id) assert_equals(copied_url_annot, annots) assert_equals(copied_URL_ent.externalURL, repo_url) assert_equals(copied_URL_ent.name, 'rand') assert_equals(copied_URL_ent.properties.dataFileHandleId, externalURL_entity.properties.dataFileHandleId) # Verify that errors are being thrown when a # file is copied to a folder/project that has a file with the same filename assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp', file_entity.id, '--destinationId', project_entity.id)
def _mk(syn_parent, count=2, suffix=''): syn_folders = [] for folder_count in range(1, count + 1): folder_name = 'Folder{0}{1}'.format(folder_count, suffix) syn_folder = syn_test_helper.client().store( synapseclient.Folder(name=folder_name, parent=syn_parent)) syn_folders.append(syn_folder) return syn_folders
def test_table_file_view_csv_update_annotations__includeEntityEtag(): folder = syn.store( synapseclient.Folder(name="updateAnnoFolder" + str(uuid.uuid4()), parent=project)) anno1_name = "annotationColumn1" anno2_name = "annotationColumn2" initial_annotations = { anno1_name: "initial_value1", anno2_name: "initial_value2" } file_entity = syn.store( File(name= "test_table_file_view_csv_update_annotations__includeEntityEtag", path="~/fakepath", synapseStore=False, parent=folder, annotations=initial_annotations)) annotation_columns = [ Column(name=anno1_name, columnType='STRING'), Column(name=anno2_name, columnType='STRING') ] entity_view = syn.store( EntityViewSchema(name="TestEntityViewSchemaUpdateAnnotation" + str(uuid.uuid4()), parent=project, scopes=[folder], columns=annotation_columns)) query_str = "SELECT {anno1}, {anno2} FROM {proj_id}".format( anno1=anno1_name, anno2=anno2_name, proj_id=utils.id_of(entity_view)) #modify first annotation using rowset rowset_query_result = syn.tableQuery(query_str, resultsAs="rowset") rowset = rowset_query_result.asRowSet() rowset_changed_anno_value = "rowset_value_change" rowset.rows[0].values[0] = rowset_changed_anno_value syn.store(rowset) #modify second annotation using csv csv_query_result = syn.tableQuery(query_str, resultsAs="csv") dataframe = csv_query_result.asDataFrame() csv_changed_anno_value = "csv_value_change" dataframe.ix[0, anno2_name] = csv_changed_anno_value syn.store(Table(utils.id_of(entity_view), dataframe)) #check annotations in the file entity. Annotations may not be immediately updated so we wait in while loop expected_annotations = { anno1_name: [rowset_changed_anno_value], anno2_name: [csv_changed_anno_value] } start_time = time.time() while (expected_annotations != file_entity.annotations): assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) file_entity = syn.get(file_entity, downloadFile=False)
def test_command_get_recursive_and_query(): """Tests the 'synapse get -r' and 'synapse get -q' functions""" # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store( synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload two files in Folder uploaded_paths = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) file_entity = synapseclient.File(f, parent=folder_entity) file_entity.location = 'folder' file_entity = syn.store(file_entity) #Add a file in the project level as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) file_entity = synapseclient.File(f, parent=project_entity) file_entity.location = 'project' file_entity = syn.store(file_entity) ### Test recursive get output = run('synapse', '--skip-checks', 'get', '-r', project_entity.id) #Verify that we downloaded files: new_paths = [ os.path.join('.', folder_entity.name, os.path.basename(f)) for f in uploaded_paths[:-1] ] new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1]))) schedule_for_cleanup(folder_entity.name) for downloaded, uploaded in zip(new_paths, uploaded_paths): print uploaded, downloaded assert os.path.exists(downloaded) assert filecmp.cmp(downloaded, uploaded) schedule_for_cleanup(new_paths[0]) ### Test query get output = run( 'synapse', '--skip-checks', 'get', '-q', "select id from file where parentId=='%s' and location=='folder'" % folder_entity.id) #Verify that we downloaded files: new_paths = [ os.path.join('.', os.path.basename(f)) for f in uploaded_paths[:-1] ] for downloaded, uploaded in zip(new_paths, uploaded_paths[:-1]): print uploaded, downloaded assert os.path.exists(downloaded) assert filecmp.cmp(downloaded, uploaded) schedule_for_cleanup(downloaded)
def _remote_folder(dirpath, remotes, syn): """Retrieve the remote folder for files, creating if necessary. """ if dirpath in remotes: return remotes[dirpath], remotes else: parent_dir, cur_dir = os.path.split(dirpath) parent_folder, remotes = _remote_folder(parent_dir, remotes, syn) s_cur_dir = syn.store(synapseclient.Folder(cur_dir, parent=parent_folder)) remotes[dirpath] = s_cur_dir.id return s_cur_dir.id, remotes
def get_parent_folder(project_id, key): parent_id = project_id folders = key.split('/') fn = folders.pop(-1) for f in folders: folder_id = syn.findEntityId(f, parent_id) if folder_id == None: # create folder: folder_id = syn.store(synapseclient.Folder(name=f, parent=parent_id), forceVersion=False)['id'] parent_id = folder_id return parent_id
def test_command_copy(): """Tests the 'synapse cp' function""" # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store( synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload a file in Folder dummy = utils.make_bogus_data_file() schedule_for_cleanup(dummy) dummy_entity = syn.store(synapseclient.File(dummy, parent=folder_entity)) repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annots = {'test': 'hello_world'} # Create, upload, and set annotations on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(synapseclient.File(filename, parent=folder_entity), used=dummy_entity.id, executed=repo_url) syn.setAnnotations(file_entity, annots) ### Test cp function output = run('synapse', '--skip-checks', 'cp', '--id', file_entity.id, '--parentid', project_entity.id) copied_id = parse(r'Copied syn\d+ to (syn\d+)', output) #Verify that our copied files are identical copied_ent = syn.get(copied_id) schedule_for_cleanup(copied_id) copied_ent_annot = syn.getAnnotations(copied_ent) copied_annot = dict((key, copied_ent_annot[key].pop()) for key in copied_ent_annot if key not in ('uri', 'id', 'creationDate', 'etag')) copied_prov = syn.getProvenance( copied_ent)['used'][0]['reference']['targetId'] assert copied_prov == file_entity.id assert copied_annot == annots #Verify that errors are being thrown when folders/projects are attempted to be copied, #or file is copied to a foler/project that has a file with the same filename assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp', '--id', folder_entity.id, '--parentid', project_entity.id) assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp', '--id', project_entity.id, '--parentid', project_entity.id) assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp', '--id', file_entity.id, '--parentid', project_entity.id)
def syn_data(mk_syn_project, syn_test_helper, mk_syn_folders_files): """ Creates this: data /core file1 file2 folder1/ file1_1 file2_1 Folder1_1/ file1_2 file2_2 Folder2_1/ file1_2 file2_2 folder2/ file1_1 file2_1 Folder1_1/ file1_2 file2_2 Folder2_1/ file1_2 file2_2 /auxiliary <same as core...> /results <same as core...> This method will return the root files/folders under data/core, data/artifacts, data/discovered. The data and data_type folders are NOT returned. """ syn_project = mk_syn_project() root_folders = [] root_files = [] for template_path in DataTypeTemplate.default().paths: parent = syn_project for name in SysPath(template_path.rel_path).rel_parts: parent = syn_test_helper.client().store( synapseclient.Folder(name=name, parent=parent)) folder, folders, files = mk_syn_folders_files(parent) root_folders += folders root_files += files return syn_project, root_folders, root_files
def test_entity_type_display_name(): assert SynapseProxy.entity_type_display_name( 'org.sagebionetworks.repo.model.Project' ) == SynapseProxy.PROJECT_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( syn.Project()) == SynapseProxy.PROJECT_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name({ 'concreteType': 'org.sagebionetworks.repo.model.Project' }) == SynapseProxy.PROJECT_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( 'org.sagebionetworks.repo.model.Folder' ) == SynapseProxy.FOLDER_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( syn.Folder(parentId='syn0')) == SynapseProxy.FOLDER_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name({ 'concreteType': 'org.sagebionetworks.repo.model.Folder' }) == SynapseProxy.FOLDER_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( 'org.sagebionetworks.repo.model.FileEntity' ) == SynapseProxy.FILE_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( syn.File(parentId='syn0')) == SynapseProxy.FILE_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name({ 'concreteType': 'org.sagebionetworks.repo.model.FileEntity' }) == SynapseProxy.FILE_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( 'org.sagebionetworks.repo.model.Link' ) == SynapseProxy.LINK_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( syn.Link(parentId='syn0', targetId='syn0')) == SynapseProxy.LINK_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name({ 'concreteType': 'org.sagebionetworks.repo.model.Link' }) == SynapseProxy.LINK_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name( 'org.sagebionetworks.repo.model.table.TableEntity' ) == SynapseProxy.TABLE_TYPE_DISPLAY_NAME assert SynapseProxy.entity_type_display_name({ 'concreteType': 'org.sagebionetworks.repo.model.table.TableEntity' }) == SynapseProxy.TABLE_TYPE_DISPLAY_NAME
def test_it_does_not_push_a_file_unless_the_local_file_changed( mk_kiproject, mk_syn_files, syn_client, mocker): kiproject = mk_kiproject() # Get the Synapse project for the KiProject syn_project = syn_client.get(DataUri.parse(kiproject.project_uri).id) syn_data_folder = syn_client.store( synapseclient.Folder(name='data', parent=syn_project)) syn_core_folder = syn_client.store( synapseclient.Folder(name='core', parent=syn_data_folder)) # Create a Synapse file to add/pull/push syn_file = mk_syn_files(syn_core_folder, file_num=1, versions=1, suffix='')[0] syn_file_uri = DataUri('syn', syn_file.id).uri kiproject.data_add(syn_file_uri, data_type=kiproject.data_types[0]) kiproject.data_pull() # The file exists in the Synapse project and has been pulled locally. # Pushing again should NOT upload the file again. mocker.spy(synapseclient.client, 'upload_file_handle') kiproject.data_push(syn_file_uri)
def test__create_synapse_resources_folder(self): """Test folders gets created""" folder_config = [ { 'name': 'Test 1', 'type': 'Folder' }, { 'name': 'Test 2', 'type': 'Folder' } ] expected_config = [ { 'name': 'Test 1', 'type': 'Folder', 'id': 'syn33333' }, { 'name': 'Test 2', 'type': 'Folder', 'id': 'syn22222' } ] folder_ent_1 = synapseclient.Folder(id="syn33333", parentId="syn5555") folder_ent_2 = synapseclient.Folder(id="syn22222", parentId="syn5555") call_1 = mock.call(name="Test 1", parentId="syn5555") call_2 = mock.call(name="Test 2", parentId="syn5555") with patch.object(self.create_cls, "get_or_create_folder", side_effect=[folder_ent_1, folder_ent_2]) as patch_create: client._create_synapse_resources(config_list=folder_config, creation_cls=self.create_cls, parentid="syn5555") patch_create.assert_has_calls([call_1, call_2]) assert folder_config == expected_config
def test__create_synapse_resources_recursive(self): """Test recursive calls are made""" project_ent = synapseclient.Project(id="syn5555") folder_ent = synapseclient.Folder(id="syn33333", parentId="syn5555") call_1 = mock.call(name="Genes", parentId="syn5555") call_2 = mock.call(name="testing", parentId="syn33333") with patch.object(self.create_cls, "get_or_create_project", return_value=project_ent) as patch_create_proj,\ patch.object(self.create_cls, "get_or_create_folder", return_value=folder_ent) as patch_create_folder: client._create_synapse_resources(config_list=self.config, creation_cls=self.create_cls) patch_create_proj.assert_called_once_with( name="Test Configuration" ) patch_create_folder.assert_has_calls([call_1, call_2])
def follow_path_to_folder(self, path, origin=None, create=False): """Return terminal folder's synID after traversing the defined path.""" if origin is None: origin = self.project_id try: name = path.popleft() except AttributeError: path = deque(path) name = path.popleft() try: is_folder_named_x_partial = partial(is_folder_named_x, name=name) next_node_id = self.check_children( node_id=origin, func=is_folder_named_x_partial)[0] except IndexError: # If no child is found: if create: # create synapse folder object if we were told to parent_obj = self.node[origin].obj new_folder = synapse.Folder(name, parent=parent_obj) new_folder = self.syn.store(new_folder) new_folder_id = new_folder['id'] # add new edge to DAG and mark for update self.add_edge(u=origin, v=new_folder_id, attr_dict=None) entity_dict = {k: v for k, v in new_folder.items()} self.node[new_folder_id] = SynNode(entity_dict=entity_dict, synapse_session=self.syn, is_root=False) # send the final result back up the chain. return new_folder_id else: # raise an error otherwise raise e.NoResult() # send next_node_id along to next level # or send the final result back up the chain. if path: return self.follow_path_to_folder(path=path, origin=next_node_id, create=create) else: return next_node_id
def mk_local_files_and_folders(start_path, prefix='', depth=3, file_count=3, folder_count=3, current_depth=0, syn_client=None, syn_parent=None): current_depth += 1 local_results = [] remote_results = [] for _ in range(0, file_count): filename = '{0}test_file_{1}_{2}.dat'.format(prefix, current_depth, gen_id()) file_path = os.path.join(start_path, filename) # Fill the file with random data. write_random_data_to_file(file_path) local_results.append(file_path) # Store the file in Synapse if syn_parent: syn_file = syn_client.store(syn.File(path=file_path, parent=syn_parent)) remote_results.append(syn_file) if current_depth < depth: # Create the folders. for _ in range(0, folder_count): foldername = '{0}test_folder_{1}_{2}'.format(prefix, current_depth, gen_id()) folder_path = mk_dirs(start_path, foldername) local_results.append(folder_path) # Create the folder in Synapse if syn_parent: syn_folder = syn_client.store(syn.Folder(name=foldername, parent=syn_parent)) remote_results.append(syn_folder) more_locals, more_remotes = mk_local_files_and_folders(folder_path, prefix=prefix, depth=depth, current_depth=current_depth, syn_client=syn_client, syn_parent=syn_folder) local_results += more_locals remote_results += more_remotes return local_results, remote_results
def setUp(self): print("Creating private Project...") test_project = sc.Project("Test" + uuid.uuid4().hex) self.project_id = syn.store(test_project).id print("Creating Folder...") folder = sc.Folder("folder", parent=self.project_id) self.folder_id = syn.store(folder).id print("Creating File within Folder...") with tempfile.NamedTemporaryFile() as temp: temp.write("123testingfolder") temp.flush() temp_file = sc.File(temp.name, parent=self.folder_id) self.folder_fileId = syn.store(temp_file).id print("Creating File within Project...") with tempfile.NamedTemporaryFile() as temp: temp.write("123testingproject") temp.flush() temp_file = sc.File(temp.name, parent=self.project_id) self.project_fileId = syn.store(temp_file).id
def _create_folder_in_synapse(self, path, synapse_parent): synapse_folder = None if not synapse_parent: self.has_errors = True logging.error( 'Parent not found, cannot create folder: {0}'.format(path)) return synapse_folder folder_name = os.path.basename(path) full_synapse_path = self._get_synapse_path(folder_name, synapse_parent) max_attempts = 5 attempt_number = 0 exception = None while attempt_number < max_attempts and not synapse_folder: try: attempt_number += 1 exception = None synapse_folder = self._synapse_client.store(syn.Folder( name=folder_name, parent=synapse_parent), forceVersion=False) except Exception as ex: exception = ex logging.error('[Folder ERROR] {0} -> {1} : {2}'.format( path, full_synapse_path, str(ex))) if attempt_number < max_attempts: sleep_time = random.randint(1, 5) logging.info('[Folder RETRY in {0}s] {1} -> {2}'.format( sleep_time, path, full_synapse_path)) time.sleep(sleep_time) if exception: self.has_errors = True logging.error('[Folder FAILED] {0} -> {1} : {2}'.format( path, full_synapse_path, str(exception))) else: logging.info('[Folder] {0} -> {1}'.format(path, full_synapse_path)) self._set_synapse_parent(synapse_folder) return synapse_folder
def main(syn): # Basic setup of the project project_name = "Testing Synapse Genie" # Determine the short and long names of the centers. center_abbreviations = ['AAA', 'BBB', 'CCC'] center_names = center_abbreviations # Create the project project = synapseclient.Project(project_name) project = syn.store(project) # Create a folder for log files generated by the GENIE processes # of validation and updating the database tables logs_folder = synapseclient.Folder(name='Logs', parent=project) logs_folder = syn.store(logs_folder) # Folder for individual center folders root_center_folder = synapseclient.Folder(name='Centers', parent=project) root_center_folder = syn.store(root_center_folder) # The folders for each center where they will upload files for validation # and submission. There is one folder per center. # This currently deviates from the original GENIE setup of having an # 'Input' and 'Staging' folder for each center. center_folders = [ synapseclient.Folder(name=name, parent=root_center_folder) for name in center_abbreviations ] center_folders = [syn.store(folder) for folder in center_folders] # Make some fake data that only contains basic text to check # for validation. n_files = 5 # number of files per center to create for folder in center_folders: for idx in range(n_files): tmp = tempfile.NamedTemporaryFile(prefix=f'TEST-{folder.name}', suffix='.txt') with open(tmp.name, mode='w') as fh: fh.write(random.choice(['ERROR', 'VALID', 'NOPE'])) synfile = syn.store(synapseclient.File(tmp.name, parent=folder)) # Set up the table that holds the validation status of all submitted files. status_schema = create_status_table(syn, project) # Set up the table that maps the center abbreviation to the folder where # their data is uploaded. This is used by the GENIE framework to find the # files to validate for a center. center_map_table_defs = [ { 'name': 'name', 'columnType': 'STRING', 'maximumSize': 250 }, { 'name': 'center', 'columnType': 'STRING', 'maximumSize': 50 }, { 'name': 'inputSynId', 'columnType': 'ENTITYID' }, # {'name': 'stagingSynId', # 'columnType': 'ENTITYID'}, { 'name': 'release', 'defaultValue': 'false', 'columnType': 'BOOLEAN' } # {'id': '68438', # 'name': 'mutationInCisFilter', # 'defaultValue': 'true', # 'columnType': 'BOOLEAN', # 'concreteType': 'org.sagebionetworks.repo.model.table.ColumnModel'} ] center_map_cols = [ synapseclient.Column(**col) for col in center_map_table_defs ] center_schema = synapseclient.Schema(name='Center Table', columns=center_map_cols, parent=project) center_schema = syn.store(center_schema) # Add the center folders created above to this table. center_folder_ids = [folder.id for folder in center_folders] center_df = pandas.DataFrame( dict(name=center_names, center=center_abbreviations, inputSynId=center_folder_ids)) tbl = synapseclient.Table(schema=center_schema, values=center_df) tbl = syn.store(tbl) # Create a table that stores the error logs for each submitted file. error_col_defs = [ { 'name': 'id', 'columnType': 'ENTITYID' }, { 'name': 'center', 'columnType': 'STRING', 'maximumSize': 50, 'facetType': 'enumeration' }, { 'name': 'errors', 'columnType': 'LARGETEXT' }, { 'name': 'name', 'columnType': 'STRING', 'maximumSize': 500 }, # {'name': 'versionNumber', # 'columnType': 'STRING', # 'maximumSize': 50}, { 'name': 'fileType', 'columnType': 'STRING', 'maximumSize': 50 } ] error_map_cols = [synapseclient.Column(**col) for col in error_col_defs] error_schema = synapseclient.Schema(name='Error Table', columns=error_map_cols, parent=project) error_schema = syn.store(error_schema) # Create a table that maps the various database tables to a short name. # This table is used in many GENIE functions to find the correct table to update # or get the state of something from. db_map_col_defs = [{ 'name': 'Database', 'columnType': 'STRING', 'maximumSize': 50 }, { 'name': 'Id', 'columnType': 'ENTITYID' }] db_map_cols = [synapseclient.Column(**col) for col in db_map_col_defs] db_map_schema = synapseclient.Schema(name='DB Mapping Table', columns=db_map_cols, parent=project) db_map_schema = syn.store(db_map_schema) # Add dbMapping annotation project.annotations.dbMapping = db_map_schema.tableId project = syn.store(project) # Add the tables we already created to the mapping table. dbmap_df = pandas.DataFrame( dict(Database=[ 'centerMapping', 'validationStatus', 'errorTracker', 'dbMapping', 'logs' ], Id=[ center_schema.id, status_schema.id, error_schema.id, db_map_schema.id, logs_folder.id ])) db_map_tbl = synapseclient.Table(schema=db_map_schema, values=dbmap_df) db_map_tbl = syn.store(db_map_tbl) # Make a top level folder for output. Some processing for # file types copy a file from one place to another. output_folder = synapseclient.Folder(name='Output', parent=project) output_folder = syn.store(output_folder) output_folder_map = [] # default_table_col_defs = status_table_col_defs = [ # {'name': 'PRIMARY_KEY', # 'columnType': 'STRING'} # ] # default_table_cols = [synapseclient.Column(**col) # for col in default_table_col_defs] default_primary_key = 'PRIMARY_KEY' # For each file type format in the format registry, create an output folder and a table. # Some GENIE file types copy a file to a new place, and some update a table. Having both # means that both of these operations will be available at the beginning. # The mapping between the file type and the folder or table have a consistent naming. # The key ('Database' value) is {file_type}_folder or {file_type}_table. # Determine which file formats are going to be used. format_registry = config.collect_format_types(['example_registry']) for file_type, obj in format_registry.items(): file_type_folder = synapseclient.Folder(name=file_type, parent=output_folder) file_type_folder = syn.store(file_type_folder) output_folder_map.append( dict(Database=f"{file_type}_folder", Id=file_type_folder.id)) file_type_schema = synapseclient.Schema(name=file_type, parent=project) file_type_schema.annotations.primaryKey = default_primary_key file_type_schema = syn.store(file_type_schema) output_folder_map.append( dict(Database=f"{file_type}_table", Id=file_type_schema.id)) # Add the folders and tables created to the mapping table. db_map_tbl = synapseclient.Table( schema=db_map_schema, values=pandas.DataFrame(output_folder_map)) db_map_tbl = syn.store(db_map_tbl)
import synapseclient from challengeutils import permissions SYN = create_autospec(synapseclient.Synapse) SET_PERMS = {"set"} @pytest.mark.parametrize( "entity,principalid,permission_level,mapped", [ # tuple with (input, expectedOutput) (synapseclient.Project(), None, "view", permissions.ENTITY_PERMS_MAPPINGS['view']), (synapseclient.Folder(parentId="syn123"), None, "download", permissions.ENTITY_PERMS_MAPPINGS['download']), (synapseclient.Entity(), None, "edit", permissions.ENTITY_PERMS_MAPPINGS['edit']), (synapseclient.Schema(parentId="syn123"), None, "edit_and_delete", permissions.ENTITY_PERMS_MAPPINGS['edit_and_delete']), (synapseclient.File(parentId="syn123"), None, "admin", permissions.ENTITY_PERMS_MAPPINGS['admin']), (synapseclient.Entity(), None, "remove", permissions.ENTITY_PERMS_MAPPINGS['remove']), (synapseclient.Evaluation(contentSource="syn123"), None, "view", permissions.EVALUATION_PERMS_MAPPINGS['view']), (synapseclient.Evaluation(contentSource="syn123"), None, "submit", permissions.EVALUATION_PERMS_MAPPINGS['submit']), (synapseclient.Evaluation(contentSource="syn123"), None, "score", permissions.EVALUATION_PERMS_MAPPINGS['score']),
def test_command_get_recursive_and_query(): """Tests the 'synapse get -r' and 'synapse get -q' functions""" project_entity = project # Create Folders in Project folder_entity = syn.store( synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity)) folder_entity2 = syn.store( synapseclient.Folder(name=str(uuid.uuid4()), parent=folder_entity)) # Create and upload two files in sub-Folder uploaded_paths = [] file_entities = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) file_entity = synapseclient.File(f, parent=folder_entity2) file_entity = syn.store(file_entity) file_entities.append(file_entity) schedule_for_cleanup(f) # Add a file in the Folder as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) file_entity = synapseclient.File(f, parent=folder_entity) file_entity = syn.store(file_entity) file_entities.append(file_entity) # get -r uses syncFromSynapse() which uses getChildren(), which is not immediately consistent, # but faster than chunked queries. time.sleep(2) # Test recursive get run('synapse', '--skip-checks', 'get', '-r', folder_entity.id) # Verify that we downloaded files: new_paths = [ os.path.join('.', folder_entity2.name, os.path.basename(f)) for f in uploaded_paths[:-1] ] new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1]))) schedule_for_cleanup(folder_entity.name) for downloaded, uploaded in zip(new_paths, uploaded_paths): assert_true(os.path.exists(downloaded)) assert_true(filecmp.cmp(downloaded, uploaded)) schedule_for_cleanup(downloaded) # Test query get using a Table with an entity column # This should be replaced when Table File Views are implemented in the client cols = [synapseclient.Column(name='id', columnType='ENTITYID')] schema1 = syn.store( synapseclient.Schema(name='Foo Table', columns=cols, parent=project_entity)) schedule_for_cleanup(schema1.id) data1 = [[x.id] for x in file_entities] syn.store( synapseclient.RowSet(schema=schema1, rows=[synapseclient.Row(r) for r in data1])) time.sleep(3) # get -q are eventually consistent # Test Table/View query get output = run('synapse', '--skip-checks', 'get', '-q', "select id from %s" % schema1.id) # Verify that we downloaded files: new_paths = [ os.path.join('.', os.path.basename(f)) for f in uploaded_paths[:-1] ] new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1]))) schedule_for_cleanup(folder_entity.name) for downloaded, uploaded in zip(new_paths, uploaded_paths): assert_true(os.path.exists(downloaded)) assert_true(filecmp.cmp(downloaded, uploaded)) schedule_for_cleanup(downloaded) schedule_for_cleanup(new_paths[0])
global synapse synapse = synapseclient.Synapse() synapse = synapseclient.login(args.synapseUser, args.synapsePassword, rememberMe=False) MAX_FILES_PER_FOLDER = 10 # create 'n' files in sourceProject (say, 10 to a folder) folder = None folderCount = 0 filesInFolder = 0 for i in range(args.numberOfFiles): if folder is None or filesInFolder >= MAX_FILES_PER_FOLDER: # create folder folder = synapseclient.Folder(str(folderCount), parent=args.sourceProject) folder = synapse.store(folder) filesInFolder = 0 folderCount = folderCount + 1 # create file, upload to folder # just make an arbitrary string s = randomword(1000) filePath = os.path.join(tempfile.gettempdir(), 'file_' + str(i) + '.txt') with open(filePath, 'w') as myfile: myfile.write(s) file = synapseclient.File( filePath, parent=folder, annotations={'someAnnotName': 'someAnnotValue'}) file = synapse.store(file)
def test_command_line_using_paths(): # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store( synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(synapseclient.File(filename, parent=folder_entity)) # Verify that we can use show with a filename output = run('synapse', '--skip-checks', 'show', filename) id = parse(r'File: %s\s+\((syn\d+)\)\s+' % os.path.split(filename)[1], output) assert_equals(file_entity.id, id) # Verify that limitSearch works by making sure we get the file entity # that's inside the folder file_entity2 = syn.store( synapseclient.File(filename, parent=project_entity)) output = run('synapse', '--skip-checks', 'get', '--limitSearch', folder_entity.id, filename) id = parse(r'Associated file: .* with synapse ID (syn\d+)', output) name = parse(r'Associated file: (.*) with synapse ID syn\d+', output) assert_equals(file_entity.id, id) assert_true(utils.equal_paths(name, filename)) # Verify that set-provenance works with filepath repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' output = run('synapse', '--skip-checks', 'set-provenance', '-id', file_entity2.id, '-name', 'TestActivity', '-description', 'A very excellent provenance', '-used', filename, '-executed', repo_url, '-limitSearch', folder_entity.id) activity_id = parse(r'Set provenance record (\d+) on entity syn\d+', output) output = run('synapse', '--skip-checks', 'get-provenance', '-id', file_entity2.id) activity = json.loads(output) assert_equals(activity['name'], 'TestActivity') assert_equals(activity['description'], 'A very excellent provenance') # Verify that store works with provenance specified with filepath repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' filename2 = utils.make_bogus_data_file() schedule_for_cleanup(filename2) output = run('synapse', '--skip-checks', 'add', filename2, '-parentid', project_entity.id, '-used', filename, '-executed', '%s %s' % (repo_url, filename)) entity_id = parse(r'Created/Updated entity:\s+(syn\d+)\s+', output) output = run('synapse', '--skip-checks', 'get-provenance', '-id', entity_id) activity = json.loads(output) a = [a for a in activity['used'] if not a['wasExecuted']] assert_in(a[0]['reference']['targetId'], [file_entity.id, file_entity2.id]) # Test associate command # I have two files in Synapse filename and filename2 path = tempfile.mkdtemp() schedule_for_cleanup(path) shutil.copy(filename, path) shutil.copy(filename2, path) run('synapse', '--skip-checks', 'associate', path, '-r') run('synapse', '--skip-checks', 'show', filename)
def test_migrate_project(request, syn, schedule_for_cleanup, storage_location_id): test_name = request.node.name project_name = "{}-{}".format(test_name, uuid.uuid4()) project = synapseclient.Project(name=project_name) project_entity = syn.store(project) file_0_path = _create_temp_file() schedule_for_cleanup(file_0_path) file_0_name = "{}-{}".format(test_name, 1) file_0 = synapseclient.File(name=file_0_name, path=file_0_path, parent=project_entity) file_0_entity = syn.store(file_0) default_storage_location_id = file_0_entity._file_handle[ 'storageLocationId'] folder_1_name = "{}-{}-{}".format(test_name, 1, uuid.uuid4()) folder_1 = synapseclient.Folder(parent=project_entity, name=folder_1_name) folder_1_entity = syn.store(folder_1) file_1_path = _create_temp_file() schedule_for_cleanup(file_1_path) file_1_name = "{}-{}".format(test_name, 1) file_1 = synapseclient.File(name=file_1_name, path=file_1_path, parent=folder_1_entity) file_1_entity = syn.store(file_1) file_2_path = _create_temp_file() schedule_for_cleanup(file_2_path) file_2_name = "{}-{}".format(test_name, 2) file_2 = synapseclient.File(name=file_2_name, path=file_2_path, parent=folder_1_entity) file_2_entity = syn.store(file_2) # file 3 shares the same file handle id as file 1 file_3_path = file_1_path file_3_name = "{}-{}".format(test_name, 3) file_3 = synapseclient.File(name=file_3_name, path=file_3_path, parent=folder_1_entity) file_3.dataFileHandleId = file_1_entity.dataFileHandleId file_3_entity = syn.store(file_3) table_1_cols = [ synapseclient.Column(name='file_col_1', columnType='FILEHANDLEID'), synapseclient.Column(name='num', columnType='INTEGER'), synapseclient.Column(name='file_col_2', columnType='FILEHANDLEID'), ] table_1 = syn.store( synapseclient.Schema(name=test_name, columns=table_1_cols, parent=folder_1_entity)) table_1_file_col_1_1 = _create_temp_file() table_1_file_handle_1 = syn.uploadFileHandle(table_1_file_col_1_1, table_1) table_1_file_col_1_2 = _create_temp_file() table_1_file_handle_2 = syn.uploadFileHandle(table_1_file_col_1_2, table_1) table_1_file_col_2_1 = _create_temp_file() table_1_file_handle_3 = syn.uploadFileHandle(table_1_file_col_2_1, table_1) table_1_file_col_2_2 = _create_temp_file() table_1_file_handle_4 = syn.uploadFileHandle(table_1_file_col_2_2, table_1) data = [ [table_1_file_handle_1['id'], 1, table_1_file_handle_2['id']], [table_1_file_handle_3['id'], 2, table_1_file_handle_4['id']], ] table_1_entity = syn.store( synapseclient.RowSet(schema=table_1, rows=[synapseclient.Row(r) for r in data])) db_path = tempfile.NamedTemporaryFile(delete=False).name schedule_for_cleanup(db_path) index_result = synapseutils.index_files_for_migration( syn, project_entity, storage_location_id, db_path, file_version_strategy='new', include_table_files=True, ) counts_by_status = index_result.get_counts_by_status() assert counts_by_status['INDEXED'] == 8 assert counts_by_status['ERRORED'] == 0 migration_result = synapseutils.migrate_indexed_files(syn, db_path, force=True) file_0_entity_updated = syn.get(utils.id_of(file_0_entity), downloadFile=False) file_1_entity_updated = syn.get(utils.id_of(file_1_entity), downloadFile=False) file_2_entity_updated = syn.get(utils.id_of(file_2_entity), downloadFile=False) file_3_entity_updated = syn.get(utils.id_of(file_3_entity), downloadFile=False) file_handles = [ f['_file_handle'] for f in ( file_0_entity_updated, file_1_entity_updated, file_2_entity_updated, file_3_entity_updated, ) ] table_1_id = utils.id_of(table_1_entity) results = syn.tableQuery("select file_col_1, file_col_2 from {}".format( utils.id_of(table_1_entity))) table_file_handles = [] for row in results: for file_handle_id in row[2:]: file_handle = syn._getFileHandleDownload( file_handle_id, table_1_id, objectType='TableEntity')['fileHandle'] table_file_handles.append(file_handle) file_handles.extend(table_file_handles) _assert_storage_location(file_handles, storage_location_id) assert storage_location_id != default_storage_location_id with sqlite3.connect(db_path) as conn: cursor = conn.cursor() query_result = cursor.execute( "select status, count(*) from migrations where type in (?, ?) group by status", (_MigrationType.FILE.value, _MigrationType.TABLE_ATTACHED_FILE.value)).fetchall() counts = {r[0]: r[1] for r in query_result} # should only be one status and they should all be migrated # should be 3 migrated files entities + 4 migrated table attached files assert len(counts) == 1 assert counts[_MigrationStatus.MIGRATED.value] == 8 csv_file = tempfile.NamedTemporaryFile(delete=False) schedule_for_cleanup(csv_file.name) migration_result.as_csv(csv_file.name) with open(csv_file.name, 'r') as csv_file_in: csv_contents = csv_file_in.read() table_1_id = table_1_entity['tableId'] # assert the content of the csv. we don't assert any particular order of the lines # but the presence of the expected lines and the correct # of lines csv_lines = csv_contents.split('\n') assert "id,type,version,row_id,col_name,from_storage_location_id,from_file_handle_id,to_file_handle_id,status,exception" in csv_lines # noqa assert f"{file_0_entity.id},file,,,,{default_storage_location_id},{file_0_entity.dataFileHandleId},{file_0_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{file_1_entity.id},file,,,,{default_storage_location_id},{file_1_entity.dataFileHandleId},{file_1_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{file_2_entity.id},file,,,,{default_storage_location_id},{file_2_entity.dataFileHandleId},{file_2_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{file_3_entity.id},file,,,,{default_storage_location_id},{file_3_entity.dataFileHandleId},{file_3_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,1,file_col_1,{default_storage_location_id},{table_1_file_handle_1['id']},{table_file_handles[0]['id']},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,1,file_col_2,{default_storage_location_id},{table_1_file_handle_2['id']},{table_file_handles[1]['id']},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,2,file_col_1,{default_storage_location_id},{table_1_file_handle_3['id']},{table_file_handles[2]['id']},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,2,file_col_2,{default_storage_location_id},{table_1_file_handle_4['id']},{table_file_handles[3]['id']},MIGRATED," in csv_lines # noqa assert "" in csv_lines # expect trailing newline in a csv
def test_command_line_client(): print("TESTING CMD LINE CLIENT") # Create a Project output = run('synapse', '--skip-checks', 'create', '-name', str(uuid.uuid4()), '-description', 'test of command line client', 'Project') project_id = parse(r'Created entity:\s+(syn\d+)\s+', output) schedule_for_cleanup(project_id) # Create a File filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) output = run('synapse', '--skip-checks', 'add', '-name', 'BogusFileEntity', '-description', 'Bogus data to test file upload', '-parentid', project_id, filename) file_entity_id = parse(r'Created/Updated entity:\s+(syn\d+)\s+', output) # Verify that we stored the file in Synapse f1 = syn.get(file_entity_id) fh = syn._getFileHandle(f1.dataFileHandleId) assert_equals(fh['concreteType'], 'org.sagebionetworks.repo.model.file.S3FileHandle') # Get File from the command line output = run('synapse', '--skip-checks', 'get', file_entity_id) downloaded_filename = parse(r'Downloaded file:\s+(.*)', output) schedule_for_cleanup(downloaded_filename) assert_true(os.path.exists(downloaded_filename)) assert_true(filecmp.cmp(filename, downloaded_filename)) # Update the File filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) output = run('synapse', '--skip-checks', 'store', '--id', file_entity_id, filename) # Get the File again output = run('synapse', '--skip-checks', 'get', file_entity_id) downloaded_filename = parse(r'Downloaded file:\s+(.*)', output) schedule_for_cleanup(downloaded_filename) assert_true(os.path.exists(downloaded_filename)) assert_true(filecmp.cmp(filename, downloaded_filename)) # Store the same file and don't force a new version # Get the existing file to determine it's current version current_file = syn.get(file_entity_id, downloadFile=False) current_version = current_file.versionNumber # Store it without forcing version output = run('synapse', '--skip-checks', 'store', '--noForceVersion', '--id', file_entity_id, filename) # Get the File again and check that the version did not change new_file = syn.get(file_entity_id, downloadFile=False) new_version = new_file.versionNumber assert_equals(current_version, new_version) # Move the file to new folder folder = syn.store(synapseclient.Folder(parentId=project_id)) output = run('synapse', 'mv', '--id', file_entity_id, '--parentid', folder.id) movedFile = syn.get(file_entity_id, downloadFile=False) assert_equals(movedFile.parentId, folder.id) # Test Provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' output = run('synapse', '--skip-checks', 'set-provenance', '-id', file_entity_id, '-name', 'TestActivity', '-description', 'A very excellent provenance', '-used', file_entity_id, '-executed', repo_url) output = run('synapse', '--skip-checks', 'get-provenance', '--id', file_entity_id) activity = json.loads(output) assert_equals(activity['name'], 'TestActivity') assert_equals(activity['description'], 'A very excellent provenance') used = utils._find_used(activity, lambda used: 'reference' in used) assert_equals(used['reference']['targetId'], file_entity_id) used = utils._find_used(activity, lambda used: 'url' in used) assert_equals(used['url'], repo_url) assert_true(used['wasExecuted']) # Note: Tests shouldn't have external dependencies # but this is a pretty picture of Singapore singapore_url = 'http://upload.wikimedia.org/wikipedia/commons/' \ 'thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg' \ '/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg' # Test external file handle output = run('synapse', '--skip-checks', 'add', '-name', 'Singapore', '-description', 'A nice picture of Singapore', '-parentid', project_id, singapore_url) exteral_entity_id = parse(r'Created/Updated entity:\s+(syn\d+)\s+', output) # Verify that we created an external file handle f2 = syn.get(exteral_entity_id) fh = syn._getFileHandle(f2.dataFileHandleId) assert_equals(fh['concreteType'], 'org.sagebionetworks.repo.model.file.ExternalFileHandle') output = run('synapse', '--skip-checks', 'get', exteral_entity_id) downloaded_filename = parse(r'Downloaded file:\s+(.*)', output) schedule_for_cleanup(downloaded_filename) assert_true(os.path.exists(downloaded_filename)) # Delete the Project run('synapse', '--skip-checks', 'delete', project_id)