def get(args, syn): if args.recursive: if args.version is not None: raise ValueError('You cannot specify a version making a recursive download.') synapseutils.syncFromSynapse(syn, args.id, args.downloadLocation, followLink=args.followLink) elif args.queryString is not None: if args.version is not None or args.id is not None: raise ValueError('You cannot specify a version or id when you are downloading a query.') ids = _getIdsFromQuery(args.queryString, syn) for id in ids: syn.get(id, downloadLocation=args.downloadLocation) else: # search by MD5 if isinstance(args.id, six.string_types) and os.path.isfile(args.id): entity = syn.get(args.id, version=args.version, limitSearch=args.limitSearch, downloadFile=False) if "path" in entity and entity.path is not None and os.path.exists(entity.path): print("Associated file: %s with synapse ID %s" % (entity.path, entity.id)) # normal syn.get operation else: entity = syn.get(args.id, version=args.version, # limitSearch=args.limitSearch, followLink=args.followLink, downloadLocation=args.downloadLocation) if "path" in entity and entity.path is not None and os.path.exists(entity.path): print("Downloaded file: %s" % os.path.basename(entity.path)) else: print('WARNING: No files associated with entity %s\n' % entity.id) print(entity) print('Creating %s' % entity.path)
def test_syncFromSynapse__downloadFile_is_false(syn): """ Verify when passing the argument downloadFile is equal to False, syncFromSynapse won't download the file to clients' local end. """ project = Project(name="the project", parent="whatever", id="syn123") file = File(name="a file", parent=project, id="syn456") folder = Folder(name="a folder", parent=project, id="syn789") entities = { file.id: file, folder.id: folder, } def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)] with patch.object(syn, "getChildren", side_effect=[[folder, file], []]),\ patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get: synapseutils.syncFromSynapse(syn, project, downloadFile=False) patch_syn_get.assert_called_once_with( file['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False, downloadFile=False, )
def get(args, syn): if args.recursive: if args.version is not None: raise ValueError('You cannot specify a version making a recursive download.') synapseutils.syncFromSynapse(syn, args.id, args.downloadLocation,followLink = args.followLink) elif args.queryString is not None: if args.version is not None or args.id is not None: raise ValueError('You cannot specify a version or id when you are dowloading a query.') ids = _getIdsFromQuery(args.queryString, syn) for id in ids: syn.get(id, downloadLocation=args.downloadLocation) else: ## search by MD5 if isinstance(args.id, six.string_types) and os.path.isfile(args.id): entity = syn.get(args.id, version=args.version, limitSearch=args.limitSearch, downloadFile=False) if "path" in entity and entity.path is not None and os.path.exists(entity.path): print("Associated file: %s with synapse ID %s" % (entity.path, entity.id)) ## normal syn.get operation else: entity = syn.get(args.id, version=args.version, # limitSearch=args.limitSearch, followLink=args.followLink, downloadLocation=args.downloadLocation) if "path" in entity and entity.path is not None and os.path.exists(entity.path): print("Downloaded file: %s" % os.path.basename(entity.path)) else: print('WARNING: No files associated with entity %s\n' % entity.id) print(entity) print('Creating %s' % entity.path)
def test_syncFromSynapse__manifest_is_root( mock__get_file_entity_provenance_dict, mock_generateManifest, syn): """ Verify manifest argument equal to "root" that pass in to syncFromSynapse, it will create root_manifest file only. """ project = Project(name="the project", parent="whatever", id="syn123") file1 = File(name="a file", parent=project, id="syn456") folder = Folder(name="a folder", parent=project, id="syn789") file2 = File(name="a file2", parent=folder, id="syn789123") # Structure of nested project # project # |---> file1 # |---> folder # |---> file2 entities = { file1.id: file1, folder.id: folder, file2.id: file2, } def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)] mock__get_file_entity_provenance_dict.return_value = {} with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\ patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get: synapseutils.syncFromSynapse(syn, project, path="./", downloadFile=False, manifest="root") assert patch_syn_get.call_args_list == [ call( file1['id'], downloadLocation="./", ifcollision='overwrite.local', followLink=False, downloadFile=False, ), call( file2['id'], downloadLocation="./a folder", ifcollision='overwrite.local', followLink=False, downloadFile=False, ) ] assert mock_generateManifest.call_count == 1 call_files = mock_generateManifest.call_args_list[0][0][1] assert len(call_files) == 2 assert call_files[0].id == "syn456" assert call_files[1].id == "syn789123"
def fetch_release(self, release): path = f"/app/releases/{release}" synapseutils.syncFromSynapse(self.syn, release, followLink=True, path=path) # Check for 'PHS-TRISEQ-V2' target = Path(f"{path}/data_gene_panel_PHS-TRISEQ-V2.txt") if not target.is_file(): copyfile("/app/references/data_gene_panel_PHS-TRISEQ-V2.txt", f"{path}/data_gene_panel_PHS-TRISEQ-V2.txt")
def test_syncFromSynapse__manifest_value_is_invalid(syn): project = Project(name="the project", parent="whatever", id="syn123") with pytest.raises(ValueError) as ve: synapseutils.syncFromSynapse(syn, project, path="./", downloadFile=False, manifest="invalid_str") assert str( ve.value ) == 'Value of manifest option should be one of the ("all", "root", "suppress")'
def test_syncFromSynapse__project_contains_empty_folder(syn): project = Project(name="the project", parent="whatever", id="syn123") file = File(name="a file", parent=project, id="syn456") folder = Folder(name="a folder", parent=project, id="syn789") entities = { file.id: file, folder.id: folder, } def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)] with patch.object(syn, "getChildren", side_effect=[[folder, file], []]) as patch_syn_get_children,\ patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get: assert [file] == synapseutils.syncFromSynapse(syn, project) expected_get_children_agrs = [call(project['id']), call(folder['id'])] assert expected_get_children_agrs == patch_syn_get_children.call_args_list patch_syn_get.assert_called_once_with( file['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False, downloadFile=True, )
def test_syncFromSynapse__folder_contains_one_file(syn): folder = Folder(name="the folder", parent="whatever", id="syn123") file = File(name="a file", parent=folder, id="syn456") with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\ patch.object(syn, "get", return_value=file): assert [file] == synapseutils.syncFromSynapse(syn, folder) patch_syn_get_children.called_with(folder['id'])
def test_syncFromSynapse__given_file_id(test_state): file_path = utils.make_bogus_data_file() test_state.schedule_for_cleanup(file_path) file = test_state.syn.store(File(file_path, name=str(uuid.uuid4()), parent=test_state.project, synapseStore=False)) all_files = synapseutils.syncFromSynapse(test_state.syn, file.id) assert 1 == len(all_files) assert file == all_files[0]
def test_syncFromSynapse(): """This function tests recursive download as defined in syncFromSynapse most of the functionality of this function are already tested in the tests/integration/test_command_line_client::test_command_get_recursive_and_query which means that the only test if for path=None """ # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload two files in Folder uploaded_paths = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=folder_entity)) # Add a file in the project level as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=project_entity)) # Test recursive get output = synapseutils.syncFromSynapse(syn, project_entity) assert_equals(len(output), len(uploaded_paths)) for f in output: assert_in(f.path, uploaded_paths)
def test_syncFromSynapse__folder_contains_one_file(): folder = Folder(name="the folder", parent="whatever", id="syn123") file = File(name="a file", parent=folder, id="syn456") with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\ patch.object(syn, "get", return_value=file): assert_equals([file], synapseutils.syncFromSynapse(syn, folder)) patch_syn_get_children.called_with(folder['id'])
def test_syncFromSynapse__given_file_id(): file_path = utils.make_bogus_data_file() schedule_for_cleanup(file_path) file = syn.store(File(file_path, name=str(uuid.uuid4()), parent=project, synapseStore=False)) all_files = synapseutils.syncFromSynapse(syn, file.id) assert_equals(1, len(all_files)) assert_equals(file, all_files[0])
def test_syncFromSynapse(): """This function tests recursive download as defined in syncFromSynapse most of the functionality of this function are already tested in the tests/integration/test_command_line_client::test_command_get_recursive_and_query which means that the only test if for path=None """ # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload two files in Folder uploaded_paths = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=folder_entity)) # Add a file in the project level as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=project_entity)) # Test recursive get output = synapseutils.syncFromSynapse(syn, project_entity) assert_equals(len(output), len(uploaded_paths)) for f in output: assert_in(f.path, uploaded_paths)
def test_syncToSynapse(test_state): # Test upload of accurate manifest manifest = _makeManifest( test_state.header + test_state.row1 + test_state.row2 + test_state.row3, test_state.schedule_for_cleanup ) synapseutils.syncToSynapse(test_state.syn, manifest, sendMessages=False, retries=2) # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues time.sleep(3) # Download using syncFromSynapse tmpdir = tempfile.mkdtemp() test_state.schedule_for_cleanup(tmpdir) synapseutils.syncFromSynapse(test_state.syn, test_state.project, path=tmpdir) orig_df = pd.read_csv(manifest, sep='\t') orig_df.index = [os.path.basename(p) for p in orig_df.path] new_df = pd.read_csv(os.path.join(tmpdir, synapseutils.sync.MANIFEST_FILENAME), sep='\t') new_df.index = [os.path.basename(p) for p in new_df.path] assert len(orig_df) == len(new_df) new_df = new_df.loc[orig_df.index] # Validate what was uploaded is in right location assert new_df.parent.equals(orig_df.parent), 'Downloaded files not stored in same location' # Validate that annotations were set cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\ + synapseutils.sync.STORE_FUNCTION_FIELDS + synapseutils.sync.PROVENANCE_FIELDS orig_anots = orig_df.drop(cols, axis=1, errors='ignore') new_anots = new_df.drop(cols, axis=1, errors='ignore') assert orig_anots.shape[1] == new_anots.shape[1] # Verify that we have the same number of cols assert new_anots.equals(orig_anots.loc[:, new_anots.columns]), 'Annotations different' # Validate that provenance is correct for provenanceType in ['executed', 'used']: # Go through each row for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]): if not pd.isnull(orig) and not pd.isnull(new): # Convert local file paths into synId.versionNumber strings orig_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in test_state.syn._convertProvenanceList(orig.split(';'))] new_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in test_state.syn._convertProvenanceList(new.split(';'))] assert set(orig_list) == set(new_list)
def test_syncFromSynapse__project_contains_empty_folder(): project = Project(name="the project", parent="whatever", id="syn123") file = File(name="a file", parent=project, id="syn456") folder = Folder(name="a folder", parent=project, id="syn789") with patch.object(syn, "getChildren", side_effect=[[folder, file], []]) as patch_syn_get_children,\ patch.object(syn, "get", side_effect=[folder, file]) as patch_syn_get: assert_equals([file], synapseutils.syncFromSynapse(syn, project)) expected_get_children_agrs = [call(project['id']), call(folder['id'])] assert_list_equal(expected_get_children_agrs, patch_syn_get_children.call_args_list) expected_get_args = [ call(folder['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False), call(file['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False)] assert_list_equal(expected_get_args, patch_syn_get.call_args_list)
def test_syncFromSynapse__children_contain_non_file(): proj = syn.store(Project(name="test_syncFromSynapse_children_non_file" + str(uuid.uuid4()))) schedule_for_cleanup(proj) temp_file = utils.make_bogus_data_file() schedule_for_cleanup(temp_file) file_entity = syn.store(File(temp_file, name="temp_file_test_syncFromSynapse_children_non_file" + str(uuid.uuid4()), parent=proj)) table_schema = syn.store(Schema(name="table_test_syncFromSynapse", parent=proj)) temp_folder = tempfile.mkdtemp() schedule_for_cleanup(temp_folder) files_list = synapseutils.syncFromSynapse(syn, proj, temp_folder) assert_equals(1, len(files_list)) assert_equals(file_entity, files_list[0])
def test_syncFromSynapse__children_contain_non_file(): proj = syn.store(Project(name="test_syncFromSynapse_children_non_file" + str(uuid.uuid4()))) schedule_for_cleanup(proj) temp_file = utils.make_bogus_data_file() schedule_for_cleanup(temp_file) file_entity = syn.store(File(temp_file, name="temp_file_test_syncFromSynapse_children_non_file" + str(uuid.uuid4()), parent=proj)) syn.store(Schema(name="table_test_syncFromSynapse", parent=proj)) temp_folder = tempfile.mkdtemp() schedule_for_cleanup(temp_folder) files_list = synapseutils.syncFromSynapse(syn, proj, temp_folder) assert_equals(1, len(files_list)) assert_equals(file_entity, files_list[0])
def test_syncFromSynapse_Links(): """This function tests recursive download of links as defined in syncFromSynapse most of the functionality of this function are already tested in the tests/integration/test_command_line_client::test_command_get_recursive_and_query which means that the only test if for path=None """ # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create a Folder hiearchy in folder_entity inner_folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=folder_entity)) second_folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload two files in Folder uploaded_paths = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) file_entity = syn.store(File(f, parent=project_entity)) # Create links to inner folder syn.store(Link(file_entity.id, parent=folder_entity)) #Add a file in the project level as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) file_entity = syn.store(File(f, parent=second_folder_entity)) # Create link to inner folder syn.store(Link(file_entity.id, parent=inner_folder_entity)) ### Test recursive get output = synapseutils.syncFromSynapse(syn, folder_entity, followLink=True) assert len(output) == len(uploaded_paths) for f in output: assert f.path in uploaded_paths
def test_syncToSynapse(): # Test upload of accurate manifest manifest = _makeManifest(header+row1+row2+row3) synapseutils.syncToSynapse(syn, manifest, sendMessages=False, retries=2) # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues time.sleep(3) # Download using syncFromSynapse tmpdir = tempfile.mkdtemp() schedule_for_cleanup(tmpdir) entities = synapseutils.syncFromSynapse(syn, project, path=tmpdir) orig_df = pd.read_csv(manifest, sep='\t') orig_df.index = [os.path.basename(p) for p in orig_df.path] new_df = pd.read_csv(os.path.join(tmpdir, synapseutils.sync.MANIFEST_FILENAME), sep='\t') new_df.index = [os.path.basename(p) for p in new_df.path] assert_equals(len(orig_df), len(new_df)) new_df = new_df.loc[orig_df.index] # Validate what was uploaded is in right location assert_true(new_df.parent.equals(orig_df.parent), 'Downloaded files not stored in same location') # Validate that annotations were set cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\ + synapseutils.sync.STORE_FUNCTION_FIELDS orig_anots = orig_df.drop(cols, axis=1, errors='ignore') new_anots = new_df.drop(cols, axis=1, errors='ignore') assert_equals(orig_anots.shape[1], new_anots.shape[1]) # Verify that we have the same number of cols assert_true(new_anots.equals(orig_anots.loc[:, new_anots.columns]), 'Annotations different') # Validate that provenance is correct for provenanceType in ['executed', 'used']: # Go through each row for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]): if not pd.isnull(orig) and not pd.isnull(new): # Convert local file paths into synId.versionNumber strings orig_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in syn._convertProvenanceList(orig.split(';'))] new_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in syn._convertProvenanceList(new.split(';'))] assert_equals(set(orig_list), set(new_list))
def test_syncFromSynapse(): """This function tests recursive download as defined in syncFromSynapse most of the functionality of this function are already tested in the tests/integration/test_command_line_client::test_command_get_recursive_and_query which means that the only test if for path=None """ # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload two files in Folder uploaded_paths = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=folder_entity)) # Add a file in the project level as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=project_entity)) # syncFromSynapse() uses chunkedQuery() which will return results that are eventually consistent # but not always right after the entity is created. start_time = time.time() while len(list(syn.getChildren(project_entity))) != 2: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) # Test recursive get output = synapseutils.syncFromSynapse(syn, project_entity) assert_equals(len(output), len(uploaded_paths)) for f in output: assert_in(f.path, uploaded_paths)
def test_syncFromSynapse(): """This function tests recursive download as defined in syncFromSynapse most of the functionality of this function are already tested in the tests/integration/test_command_line_client::test_command_get_recursive_and_query which means that the only test if for path=None """ # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload two files in Folder uploaded_paths = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=folder_entity)) # Add a file in the project level as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=project_entity)) # syncFromSynapse() uses chunkedQuery() which will return results that are eventually consistent # but not always right after the entity is created. start_time = time.time() while len(list(syn.getChildren(project_entity))) != 2: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) # Test recursive get output = synapseutils.syncFromSynapse(syn, project_entity) assert_equals(len(output), len(uploaded_paths)) for f in output: assert_in(f.path, uploaded_paths)
# if fname.endswith('sortedByCoord.out.bam'): # reversed_list_len += 1 #index = 0 #for line in reversed_list: # for line in input_file: # line = line.strip().split('\t') # syn_id = line[0] # fname = line[1] # if fname.endswith('sortedByCoord.out.bam'): # index += 1 # print(str(index)+'/'+str(reversed_list_len)) # if os.path.isfile('RNAseq/CMC_HBCC/bam/'+fname): # print('RNAseq/CMC_HBCC/bam/'+fname+' exists') # continue # files = synapseutils.syncFromSynapse(syn, syn_id, path = 'RNAseq/CMC_HBCC/bam/') files = synapseutils.syncFromSynapse(syn, 'syn10476936', path = 'RNAseq/CMC_HBCC/bam/') # Download metadata for Brainseq(LIBD_szControl), CMC, and CMC_HBCC to check if we already have all those samples #results = syn.tableQuery('select * from syn8466658 where "study" = \'CMC\' AND "assay" = \'rnaSeq\'') #CMC_meta_dir = 'metadata/CMC/' #if not os.path.exists(CMC_meta_dir): # os.makedirs(CMC_meta_dir) #with open(CMC_meta_dir+'/CMC_RNAseq_metadata.txt','w') as out: # for row in results: # row = [str(x) for x in row] # out.write('\t'.join(row)+'\n') #results = syn.tableQuery('select * from syn8466658 where "study" = \'CMC_HBCC\' AND "assay" = \'rnaSeq\'') #CMC_HBCC_meta_dir = 'metadata/CMC_HBCC' #if not os.path.exists(CMC_HBCC_meta_dir):
syn.login(user, password) print('Sync AMP-AD') download_MSBB = True download_MAYOTCX = True download_MAYOCBE = True download_ROSMAP = False download_all = True # STAR MSBB if download_MSBB: if not download_all: for file in syn.getChildren('syn8540822'): if file['name'].startswith('hB'): synapseutils.syncFromSynapse(syn, file['id'], path='BAMs/MSBB/') print(file['id'], file['name']) else: # aligned BAM files MSBB files = synapseutils.syncFromSynapse(syn, 'syn8540822', path='BAMs/MSBB/') # STAR MSBB files = synapseutils.syncFromSynapse(syn, 'syn12104381', path='MSBB/STAR') # metadata MSBB files = synapseutils.syncFromSynapse(syn, 'syn7392158', path='metadata/')
def test_syncFromSynapse__empty_folder(): folder = Folder(name="the folder", parent="whatever", id="syn123") with patch.object(syn, "getChildren", return_value=[]),\ patch.object(syn, "get", return_value=Folder(name="asssdfa", parent="whatever")): assert_equals(list(), synapseutils.syncFromSynapse(syn, folder))
def test_syncFromSynapse__file_entity(): file = File(name="a file", parent="some parent", id="syn456") with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\ patch.object(syn, "get", return_value=file): assert_equals([file], synapseutils.syncFromSynapse(syn, file)) patch_syn_get_children.assert_not_called()
import os import synapseclient import synapseutils import argparse import getpass parser = argparse.ArgumentParser( description='Download RNAseq and genotypes of CMC.') parser.add_argument('RNAseq_directory', help='Directory to download RNAseq data to') args = parser.parse_args() user = input("Synapse username:") password = getpass.getpass('Synapse password:'******'Sync Brainseq') # RNAseq #files = synapseutils.syncFromSynapse(syn, 'syn8227833', path = 'RNAseq/') # Phenotype file files = synapseutils.syncFromSynapse(syn, 'syn12299752', path='phenotype_data/') files = synapseutils.syncFromSynapse(syn, 'syn7203084', path='phenotype_data/') files = synapseutils.syncFromSynapse(syn, 'syn7203089', path='phenotype_data/') files = synapseutils.syncFromSynapse(syn, 'syn8017780', path='phenotype_data/')
# Load required libraries import synapseclient import synapseutils # login to Synapse syn = synapseclient.login( email='', # your synapse email id password='' # your password ) files = synapseutils.syncFromSynapse(syn, entity='syn20564743', path='./data/')
def test_syncFromSynapse__file_entity(syn): file = File(name="a file", parent="some parent", id="syn456") with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\ patch.object(syn, "get", return_value=file): assert [file] == synapseutils.syncFromSynapse(syn, file) patch_syn_get_children.assert_not_called()
import synapseclient import getpass import synapseutils from datetime import datetime today = datetime.today().strftime('%Y-%m-%d') user = input("Synapse username:") password = getpass.getpass('Synapse password:'******'syn16984409', path=today + '-Sieberts-eQTLs/') # Mayo TCX eQTL synapseutils.syncFromSynapse(syn, 'syn16984410', path=today + '-Sieberts-eQTLs/') # Mayo CER eQTL synapseutils.syncFromSynapse(syn, 'syn16984411', path=today + '-Sieberts-eQTLs/') # meta-analysis eQTL synapseutils.syncFromSynapse(syn, 'syn16984815',
def test_syncFromSynase__manifest(syn): """Verify that we generate manifest files when syncing to a location outside of the cache.""" project = Project(name="the project", parent="whatever", id="syn123") path1 = '/tmp/foo' file1 = File(name="file1", parent=project, id="syn456", path=path1) path2 = '/tmp/afolder/bar' file2 = File(name="file2", parent=project, id="syn789", parentId='syn098', path=path2) folder = Folder(name="afolder", parent=project, id="syn098") entities = { file1.id: file1, file2.id: file2, folder.id: folder, } def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)] file_1_provenance = Activity(data={ 'used': '', 'executed': '', }) file_2_provenance = Activity(data={ 'used': '', 'executed': '', 'name': 'foo', 'description': 'bar', }) provenance = { file1.id: file_1_provenance, file2.id: file_2_provenance, } def getProvenance_side_effect(entity, *args, **kwargs): return provenance[id_of(entity)] expected_project_manifest = \ f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription {path1}\tsyn123\tfile1\tTrue\t\t\t\t\t {path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar """ expected_folder_manifest = \ f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription {path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar """ expected_synced_files = [file2, file1] with tempfile.TemporaryDirectory() as sync_dir: with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\ patch.object(syn, "get", side_effect=syn_get_side_effect),\ patch.object(syn, "getProvenance") as patch_syn_get_provenance: patch_syn_get_provenance.side_effect = getProvenance_side_effect synced_files = synapseutils.syncFromSynapse(syn, project, path=sync_dir) assert sorted([id_of(e) for e in expected_synced_files ]) == sorted([id_of(e) for e in synced_files]) # we only expect two calls to provenance even though there are three rows of provenance data # in the manifests (two in the outer project, one in the folder) # since one of the files is repeated in both manifests we expect only the single get provenance call assert len( expected_synced_files) == patch_syn_get_provenance.call_count # we should have two manifest files, one rooted at the project and one rooted in the sub folder _compareCsv( expected_project_manifest, os.path.join(sync_dir, synapseutils.sync.MANIFEST_FILENAME)) _compareCsv( expected_folder_manifest, os.path.join(sync_dir, folder.name, synapseutils.sync.MANIFEST_FILENAME))
import getpass parser = argparse.ArgumentParser( description='Download RNAseq and genotypes of CMC.') parser.add_argument('RNAseq_directory', help='Directory to download RNAseq data to') parser.add_argument('Genotype_directory', help='Directory to download genotypes to') args = parser.parse_args() user = input("Synapse username:") password = getpass.getpass('Synapse password:'******'Sync CMC') # RNAseq #files = synapseutils.syncFromSynapse(syn, 'syn3280440', path = args.RNAseq_directory) # Genotypes #files = synapseutils.syncFromSynapse(syn, 'syn3275211', path = args.Genotype_directory) # Metadata files = synapseutils.syncFromSynapse(syn, 'syn3354385', path='metadata/') files = synapseutils.syncFromSynapse(syn, 'syn3346807', path='metadata/') files = synapseutils.syncFromSynapse(syn, 'syn18358379', path='metadata/') files = synapseutils.syncFromSynapse(syn, 'syn18403963', path='metadata/') files = synapseutils.syncFromSynapse(syn, 'syn3346441', path='metadata/') files = synapseutils.syncFromSynapse(syn, 'syn18358480', path='metadata/')
import synapseclient import synapseutils syn = synapseclient.Synapse() syn.login('vinyesm', 'M02p4sse?') #files = synapseutils.syncFromSynapse(syn, 'syn2820442') #files = synapseutils.syncFromSynapse(syn,'syn2820442','/home/marina/Marina/learning-gm/DREAM5/sub_challenge1') #files = synapseutils.syncFromSynapse(syn,'syn2867578','/home/marina/Marina/learning-gm/DREAM5/sub_challenge2') files = synapseutils.syncFromSynapse( syn, 'syn2787211', '/home/marina/Marina/learning-gm/DREAM5/Network-Inference') #entity = syn.get('syn2820442') #print(entity)
args = parser.parse_args() user = input("Synapse username:") password = getpass.getpass('Synapse password:'******'Sync AMP-AD') # STAR MSBB # files = synapseutils.syncFromSynapse(syn, 'syn12104381', path = 'MSBB/STAR')#, exclude='.bam') # metadata MSBB files = synapseutils.syncFromSynapse(syn, 'syn7392158', path = 'metadata/')#, exclude='.bam') # STAR MAYO #files = synapseutils.syncFromSynapse(syn, 'syn12104376', path = 'MAYO/STAR/')#, exclude='.bam') # metadata MAYO files = synapseutils.syncFromSynapse(syn, 'syn11384571', path = 'metadata/')#, exclude='.bam') files = synapseutils.syncFromSynapse(syn, 'syn5223705', path = 'metadata/')#, exclude='.bam') files = synapseutils.syncFromSynapse(syn, 'syn3817650', path = 'metadata/')#, exclude='.bam') # STAR ROSMAP #files = synapseutils.syncFromSynapse(syn, 'syn12104384', path = 'ROSMAP/STAR/')#, exclude='.bam') # ROSMAP metadata files = synapseutils.syncFromSynapse(syn, 'syn3157322', path = 'metadata')#, exclude='.bam') files = synapseutils.syncFromSynapse(syn, 'syn11958660', path = 'metadata')#, exclude='.bam') files = synapseutils.syncFromSynapse(syn, 'syn11384589', path = 'metadata')#, exclude='.bam')
def get_dream_data(syn, cohort, omic_type, source=None): """Retrieves a particular -omic dataset used in the challenge. Args: syn (synapseclient.Synapse): A logged-into Synapse instance. cohort (str): A TCGA cohort included in the challenge. omic_type (str): A type of -omics used in the challenge. Note that multiple -omic types can be downloaded by listing the -omic types in a single string, separated by a '+'. Examples: >>> import synapseclient >>> syn = synapseclient.Synapse() >>> syn.login() >>> >>> get_dream_data(syn, "BRCA", "rna") >>> get_dream_data(syn, "OV", "cna", "PNNL") >>> get_dream_data(syn, "BRCA", "rna+cna") """ syn_manifest = synapseutils.syncFromSynapse(syn, "syn10139523", ifcollision='overwrite.local') # if we want to use multiple -omic datasets, get Synapse ids # for all of them... if '+' in omic_type: omic_type = omic_type.split('+') dream_ids = [syn_ids[cohort][omic_tp] for omic_tp in omic_type] # ...otherwise, get the Synapse id for the one dataset else: dream_ids = syn_ids[cohort][omic_type] omic_type = [omic_type] dream_ids = [dream_ids] dream_ids = [ dream_id if isinstance(dream_id, str) else dream_id[source] for dream_id in dream_ids ] # read in the -omic dataset(s) and merge them according to sample ID omic_list = { omic_tp: pd.read_csv(syn.get("syn{}".format(dream_id)).path, sep='\t', index_col=0).transpose() for dream_id, omic_tp in zip(dream_ids, omic_type) } # replaces missing values according to the -omic type omic_list = { omic_tp: (omic_tbl.fillna(0.0) if omic_tp == 'cna' else omic_tbl.fillna(np.min(np.min(omic_tbl)) - 1)) for omic_tp, omic_tbl in omic_list.items() } # merges datasets into one table, converts the pandas MultiIndex of this # table into a flat list of strings dream_data = pd.concat(omic_list, join='inner', axis=1) dream_data.columns = ["__".join(col) for col in dream_data.columns.values] return dream_data
# Load required libraries import synapseclient import synapseutils # login to Synapse syn = synapseclient.login( email='', # your synapse email id password='' # your password ) files = synapseutils.syncFromSynapse(syn, entity='syn18507661', path='./data/') files = synapseutils.syncFromSynapse(syn, entity='syn20632048', path='./data/')
def test_syncFromSynapse__empty_folder(syn): folder = Folder(name="the folder", parent="whatever", id="syn123") with patch.object(syn, "getChildren", return_value=[]),\ patch.object(syn, "get", return_value=Folder(name="asssdfa", parent="whatever")): assert list() == synapseutils.syncFromSynapse(syn, folder)
help='Directory to download RNAseq data to') args = parser.parse_args() user = input("Synapse username:") password = getpass.getpass('Synapse password:'******'Sync psychEncode') # BipSeq #files = synapseutils.syncFromSynapse(syn, 'syn8403872', path = 'RNAseq/BipSeq/') #files = synapseutils.syncFromSynapse(syn, 'syn5845180', path = 'metadata/BipSeq/') # BrainGVEX #files = synapseutils.syncFromSynapse(syn, 'syn7062404', path = 'RNAseq/BrainGVEX/') #files = synapseutils.syncFromSynapse(syn, 'syn3270014', path = 'metadata/BrainGVEX/') # EpiGABA #files = synapseutils.syncFromSynapse(syn, 'syn4588490', path = 'RNAseq/EpiGABA/') #files = synapseutils.syncFromSynapse(syn, 'syn4588489', path = 'metadata/EpiGABA/') # UCLA-ASD files = synapseutils.syncFromSynapse(syn, 'syn4587614', path='metadata/UCLA_ASD/') files = synapseutils.syncFromSynapse(syn, 'syn4587615', path='RNAseq/UCLA_ASD/') # LIBD_szControl # NOTE: This is Brainseq, we already have these samples
syn.login(user,password) print('Sync AMP-AD') download_MSBB = True download_MAYOTCX = True download_MAYOCBE = True download_ROSMAP = True download_all = True # STAR MSBB if download_MSBB: if not download_all: for file in syn.getChildren('syn8540822'): if file['name'].startswith('hB'): synapseutils.syncFromSynapse(syn, file['id'], path = 'BAMs/MSBB/') print(file['id'], file['name']) else: # fastq files files = synapseutils.syncFromSynapse(syn, 'syn8612191', path = 'fastq/MSBB/') # aligned BAM files MSBB # files = synapseutils.syncFromSynapse(syn, 'syn8540822', path = 'BAMs/MSBB/') # STAR MSBB # files = synapseutils.syncFromSynapse(syn, 'syn12104381', path = 'MSBB/STAR') # metadata MSBB files = synapseutils.syncFromSynapse(syn, 'syn7392158', path = 'metadata/') if download_MAYOCBE: # fastq files = synapseutils.syncFromSynapse(syn, 'syn8612213', path = 'fastq/MayoCBE/')