def get(args, syn):
    if args.recursive:
        if args.version is not None:
            raise ValueError('You cannot specify a version making a recursive download.')
        synapseutils.syncFromSynapse(syn, args.id, args.downloadLocation, followLink=args.followLink)
    elif args.queryString is not None:
        if args.version is not None or args.id is not None:
            raise ValueError('You cannot specify a version or id when you are downloading a query.')
        ids = _getIdsFromQuery(args.queryString, syn)
        for id in ids:
            syn.get(id, downloadLocation=args.downloadLocation)
    else:
        # search by MD5
        if isinstance(args.id, six.string_types) and os.path.isfile(args.id):
            entity = syn.get(args.id, version=args.version, limitSearch=args.limitSearch, downloadFile=False)
            if "path" in entity and entity.path is not None and os.path.exists(entity.path):
                print("Associated file: %s with synapse ID %s" % (entity.path, entity.id))
        # normal syn.get operation
        else:
            entity = syn.get(args.id, version=args.version,  # limitSearch=args.limitSearch,
                             followLink=args.followLink,
                             downloadLocation=args.downloadLocation)
            if "path" in entity and entity.path is not None and os.path.exists(entity.path):
                print("Downloaded file: %s" % os.path.basename(entity.path))
            else:
                print('WARNING: No files associated with entity %s\n' % entity.id)
                print(entity)

        print('Creating %s' % entity.path)
def test_syncFromSynapse__downloadFile_is_false(syn):
    """
    Verify when passing the argument downloadFile is equal to False,
    syncFromSynapse won't download the file to clients' local end.
    """

    project = Project(name="the project", parent="whatever", id="syn123")
    file = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")

    entities = {
        file.id: file,
        folder.id: folder,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    with patch.object(syn, "getChildren", side_effect=[[folder, file], []]),\
            patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get:

        synapseutils.syncFromSynapse(syn, project, downloadFile=False)
        patch_syn_get.assert_called_once_with(
            file['id'],
            downloadLocation=None,
            ifcollision='overwrite.local',
            followLink=False,
            downloadFile=False,
        )
Exemplo n.º 3
0
def get(args, syn):
    if args.recursive:
        if args.version is not None:
            raise ValueError('You cannot specify a version making a recursive download.')
        synapseutils.syncFromSynapse(syn, args.id, args.downloadLocation,followLink = args.followLink)
    elif args.queryString is not None:
        if args.version is not None or args.id is not None:
            raise ValueError('You cannot specify a version or id when you are dowloading a query.')
        ids = _getIdsFromQuery(args.queryString, syn)
        for id in ids:
            syn.get(id, downloadLocation=args.downloadLocation)
    else:
        ## search by MD5
        if isinstance(args.id, six.string_types) and os.path.isfile(args.id):
            entity = syn.get(args.id, version=args.version, limitSearch=args.limitSearch, downloadFile=False)
            if "path" in entity and entity.path is not None and os.path.exists(entity.path):
                print("Associated file: %s with synapse ID %s" % (entity.path, entity.id))
        ## normal syn.get operation
        else:
            entity = syn.get(args.id, version=args.version, # limitSearch=args.limitSearch,
                             followLink=args.followLink,
                             downloadLocation=args.downloadLocation)
            if "path" in entity and entity.path is not None and os.path.exists(entity.path):
                print("Downloaded file: %s" % os.path.basename(entity.path))
            else:
                print('WARNING: No files associated with entity %s\n' % entity.id)
                print(entity)

        print('Creating %s' % entity.path)
def test_syncFromSynapse__manifest_is_root(
        mock__get_file_entity_provenance_dict, mock_generateManifest, syn):
    """
    Verify manifest argument equal to "root" that pass in to syncFromSynapse, it will create root_manifest file only.
    """

    project = Project(name="the project", parent="whatever", id="syn123")
    file1 = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")
    file2 = File(name="a file2", parent=folder, id="syn789123")

    # Structure of nested project
    # project
    #    |---> file1
    #    |---> folder
    #             |---> file2

    entities = {
        file1.id: file1,
        folder.id: folder,
        file2.id: file2,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    mock__get_file_entity_provenance_dict.return_value = {}

    with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\
            patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get:

        synapseutils.syncFromSynapse(syn,
                                     project,
                                     path="./",
                                     downloadFile=False,
                                     manifest="root")
        assert patch_syn_get.call_args_list == [
            call(
                file1['id'],
                downloadLocation="./",
                ifcollision='overwrite.local',
                followLink=False,
                downloadFile=False,
            ),
            call(
                file2['id'],
                downloadLocation="./a folder",
                ifcollision='overwrite.local',
                followLink=False,
                downloadFile=False,
            )
        ]

        assert mock_generateManifest.call_count == 1

        call_files = mock_generateManifest.call_args_list[0][0][1]
        assert len(call_files) == 2
        assert call_files[0].id == "syn456"
        assert call_files[1].id == "syn789123"
	def fetch_release(self, release):
		path = f"/app/releases/{release}"
		synapseutils.syncFromSynapse(self.syn, release, followLink=True, path=path) 

		# Check for 'PHS-TRISEQ-V2'
		target = Path(f"{path}/data_gene_panel_PHS-TRISEQ-V2.txt")
		if not target.is_file():
			copyfile("/app/references/data_gene_panel_PHS-TRISEQ-V2.txt", f"{path}/data_gene_panel_PHS-TRISEQ-V2.txt")
def test_syncFromSynapse__manifest_value_is_invalid(syn):
    project = Project(name="the project", parent="whatever", id="syn123")
    with pytest.raises(ValueError) as ve:
        synapseutils.syncFromSynapse(syn,
                                     project,
                                     path="./",
                                     downloadFile=False,
                                     manifest="invalid_str")
    assert str(
        ve.value
    ) == 'Value of manifest option should be one of the ("all", "root", "suppress")'
def test_syncFromSynapse__project_contains_empty_folder(syn):
    project = Project(name="the project", parent="whatever", id="syn123")
    file = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")

    entities = {
        file.id: file,
        folder.id: folder,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    with patch.object(syn, "getChildren", side_effect=[[folder, file], []]) as patch_syn_get_children,\
            patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get:

        assert [file] == synapseutils.syncFromSynapse(syn, project)
        expected_get_children_agrs = [call(project['id']), call(folder['id'])]
        assert expected_get_children_agrs == patch_syn_get_children.call_args_list
        patch_syn_get.assert_called_once_with(
            file['id'],
            downloadLocation=None,
            ifcollision='overwrite.local',
            followLink=False,
            downloadFile=True,
        )
def test_syncFromSynapse__folder_contains_one_file(syn):
    folder = Folder(name="the folder", parent="whatever", id="syn123")
    file = File(name="a file", parent=folder, id="syn456")
    with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\
            patch.object(syn, "get", return_value=file):
        assert [file] == synapseutils.syncFromSynapse(syn, folder)
        patch_syn_get_children.called_with(folder['id'])
Exemplo n.º 9
0
def test_syncFromSynapse__given_file_id(test_state):
    file_path = utils.make_bogus_data_file()
    test_state.schedule_for_cleanup(file_path)
    file = test_state.syn.store(File(file_path, name=str(uuid.uuid4()), parent=test_state.project, synapseStore=False))
    all_files = synapseutils.syncFromSynapse(test_state.syn, file.id)
    assert 1 == len(all_files)
    assert file == all_files[0]
Exemplo n.º 10
0
def test_syncFromSynapse():
    """This function tests recursive download as defined in syncFromSynapse
    most of the functionality of this function are already tested in the 
    tests/integration/test_command_line_client::test_command_get_recursive_and_query

    which means that the only test if for path=None
    """
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        syn.store(File(f, parent=folder_entity))
    # Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    syn.store(File(f, parent=project_entity))

    # Test recursive get
    output = synapseutils.syncFromSynapse(syn, project_entity)

    assert_equals(len(output), len(uploaded_paths))
    for f in output:
        assert_in(f.path, uploaded_paths)
def test_syncFromSynapse__folder_contains_one_file():
    folder = Folder(name="the folder", parent="whatever", id="syn123")
    file = File(name="a file", parent=folder, id="syn456")
    with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\
         patch.object(syn, "get", return_value=file):
        assert_equals([file], synapseutils.syncFromSynapse(syn, folder))
        patch_syn_get_children.called_with(folder['id'])
Exemplo n.º 12
0
def test_syncFromSynapse__given_file_id():
    file_path = utils.make_bogus_data_file()
    schedule_for_cleanup(file_path)
    file = syn.store(File(file_path, name=str(uuid.uuid4()), parent=project, synapseStore=False))
    all_files = synapseutils.syncFromSynapse(syn, file.id)
    assert_equals(1, len(all_files))
    assert_equals(file, all_files[0])
def test_syncFromSynapse__given_file_id():
    file_path = utils.make_bogus_data_file()
    schedule_for_cleanup(file_path)
    file = syn.store(File(file_path, name=str(uuid.uuid4()), parent=project, synapseStore=False))
    all_files = synapseutils.syncFromSynapse(syn, file.id)
    assert_equals(1, len(all_files))
    assert_equals(file, all_files[0])
def test_syncFromSynapse():
    """This function tests recursive download as defined in syncFromSynapse
    most of the functionality of this function are already tested in the
    tests/integration/test_command_line_client::test_command_get_recursive_and_query

    which means that the only test if for path=None
    """
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        syn.store(File(f, parent=folder_entity))
    # Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    syn.store(File(f, parent=project_entity))

    # Test recursive get
    output = synapseutils.syncFromSynapse(syn, project_entity)

    assert_equals(len(output), len(uploaded_paths))
    for f in output:
        assert_in(f.path, uploaded_paths)
Exemplo n.º 15
0
def test_syncToSynapse(test_state):
    # Test upload of accurate manifest
    manifest = _makeManifest(
        test_state.header + test_state.row1 + test_state.row2 + test_state.row3,
        test_state.schedule_for_cleanup
    )
    synapseutils.syncToSynapse(test_state.syn, manifest, sendMessages=False, retries=2)

    # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues
    time.sleep(3)

    # Download using syncFromSynapse
    tmpdir = tempfile.mkdtemp()
    test_state.schedule_for_cleanup(tmpdir)
    synapseutils.syncFromSynapse(test_state.syn, test_state.project, path=tmpdir)

    orig_df = pd.read_csv(manifest, sep='\t')
    orig_df.index = [os.path.basename(p) for p in orig_df.path]
    new_df = pd.read_csv(os.path.join(tmpdir, synapseutils.sync.MANIFEST_FILENAME), sep='\t')
    new_df.index = [os.path.basename(p) for p in new_df.path]

    assert len(orig_df) == len(new_df)
    new_df = new_df.loc[orig_df.index]

    # Validate what was uploaded is in right location
    assert new_df.parent.equals(orig_df.parent), 'Downloaded files not stored in same location'

    # Validate that annotations were set
    cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\
        + synapseutils.sync.STORE_FUNCTION_FIELDS + synapseutils.sync.PROVENANCE_FIELDS
    orig_anots = orig_df.drop(cols, axis=1, errors='ignore')
    new_anots = new_df.drop(cols, axis=1, errors='ignore')
    assert orig_anots.shape[1] == new_anots.shape[1]  # Verify that we have the same number of cols
    assert new_anots.equals(orig_anots.loc[:, new_anots.columns]), 'Annotations different'

    # Validate that provenance is correct
    for provenanceType in ['executed', 'used']:
        # Go through each row
        for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]):
            if not pd.isnull(orig) and not pd.isnull(new):
                # Convert local file paths into synId.versionNumber strings
                orig_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i
                             for i in test_state.syn._convertProvenanceList(orig.split(';'))]
                new_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i
                            for i in test_state.syn._convertProvenanceList(new.split(';'))]
                assert set(orig_list) == set(new_list)
def test_syncFromSynapse__project_contains_empty_folder():
    project = Project(name="the project", parent="whatever", id="syn123")
    file = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")
    with patch.object(syn, "getChildren", side_effect=[[folder, file], []]) as patch_syn_get_children,\
         patch.object(syn, "get", side_effect=[folder, file]) as patch_syn_get:
        assert_equals([file], synapseutils.syncFromSynapse(syn, project))
        expected_get_children_agrs = [call(project['id']), call(folder['id'])]
        assert_list_equal(expected_get_children_agrs, patch_syn_get_children.call_args_list)
        expected_get_args = [
            call(folder['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False),
            call(file['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False)]
        assert_list_equal(expected_get_args, patch_syn_get.call_args_list)
def test_syncFromSynapse__project_contains_empty_folder():
    project = Project(name="the project", parent="whatever", id="syn123")
    file = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")
    with patch.object(syn, "getChildren", side_effect=[[folder, file], []]) as patch_syn_get_children,\
         patch.object(syn, "get", side_effect=[folder, file]) as patch_syn_get:
        assert_equals([file], synapseutils.syncFromSynapse(syn, project))
        expected_get_children_agrs = [call(project['id']), call(folder['id'])]
        assert_list_equal(expected_get_children_agrs, patch_syn_get_children.call_args_list)
        expected_get_args = [
            call(folder['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False),
            call(file['id'], downloadLocation=None, ifcollision='overwrite.local', followLink=False)]
        assert_list_equal(expected_get_args, patch_syn_get.call_args_list)
Exemplo n.º 18
0
def test_syncFromSynapse__children_contain_non_file():
    proj = syn.store(Project(name="test_syncFromSynapse_children_non_file" + str(uuid.uuid4())))
    schedule_for_cleanup(proj)

    temp_file = utils.make_bogus_data_file()
    schedule_for_cleanup(temp_file)
    file_entity = syn.store(File(temp_file, name="temp_file_test_syncFromSynapse_children_non_file" + str(uuid.uuid4()), parent=proj))

    table_schema = syn.store(Schema(name="table_test_syncFromSynapse", parent=proj))

    temp_folder = tempfile.mkdtemp()
    schedule_for_cleanup(temp_folder)

    files_list = synapseutils.syncFromSynapse(syn, proj, temp_folder)
    assert_equals(1, len(files_list))
    assert_equals(file_entity, files_list[0])
def test_syncFromSynapse__children_contain_non_file():
    proj = syn.store(Project(name="test_syncFromSynapse_children_non_file" + str(uuid.uuid4())))
    schedule_for_cleanup(proj)

    temp_file = utils.make_bogus_data_file()
    schedule_for_cleanup(temp_file)
    file_entity = syn.store(File(temp_file, name="temp_file_test_syncFromSynapse_children_non_file" + str(uuid.uuid4()),
                                 parent=proj))

    syn.store(Schema(name="table_test_syncFromSynapse", parent=proj))

    temp_folder = tempfile.mkdtemp()
    schedule_for_cleanup(temp_folder)

    files_list = synapseutils.syncFromSynapse(syn, proj, temp_folder)
    assert_equals(1, len(files_list))
    assert_equals(file_entity, files_list[0])
Exemplo n.º 20
0
def test_syncFromSynapse_Links():
    """This function tests recursive download of links as defined in syncFromSynapse
    most of the functionality of this function are already tested in the 
    tests/integration/test_command_line_client::test_command_get_recursive_and_query

    which means that the only test if for path=None
    """
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    # Create a Folder hiearchy in folder_entity
    inner_folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=folder_entity))

    second_folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        file_entity = syn.store(File(f, parent=project_entity))
        # Create links to inner folder
        syn.store(Link(file_entity.id, parent=folder_entity))
    #Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    file_entity = syn.store(File(f, parent=second_folder_entity))
    # Create link to inner folder
    syn.store(Link(file_entity.id, parent=inner_folder_entity))

    ### Test recursive get
    output = synapseutils.syncFromSynapse(syn, folder_entity, followLink=True)

    assert len(output) == len(uploaded_paths)
    for f in output:
        assert f.path in uploaded_paths
def test_syncToSynapse():
    # Test upload of accurate manifest
    manifest = _makeManifest(header+row1+row2+row3)
    synapseutils.syncToSynapse(syn, manifest, sendMessages=False, retries=2)

    # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues
    time.sleep(3)

    # Download using syncFromSynapse
    tmpdir = tempfile.mkdtemp()
    schedule_for_cleanup(tmpdir)
    entities = synapseutils.syncFromSynapse(syn, project, path=tmpdir)
    
    orig_df = pd.read_csv(manifest, sep='\t')
    orig_df.index = [os.path.basename(p) for p in orig_df.path]
    new_df = pd.read_csv(os.path.join(tmpdir, synapseutils.sync.MANIFEST_FILENAME), sep='\t')
    new_df.index = [os.path.basename(p) for p in new_df.path]

    assert_equals(len(orig_df), len(new_df))
    new_df = new_df.loc[orig_df.index]

    # Validate what was uploaded is in right location
    assert_true(new_df.parent.equals(orig_df.parent), 'Downloaded files not stored in same location')

    # Validate that annotations were set
    cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\
           + synapseutils.sync.STORE_FUNCTION_FIELDS
    orig_anots = orig_df.drop(cols, axis=1, errors='ignore')
    new_anots = new_df.drop(cols, axis=1, errors='ignore')
    assert_equals(orig_anots.shape[1], new_anots.shape[1])  # Verify that we have the same number of cols
    assert_true(new_anots.equals(orig_anots.loc[:, new_anots.columns]), 'Annotations different')
    
    # Validate that provenance is correct
    for provenanceType in ['executed', 'used']:
        # Go through each row
        for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]):
            if not pd.isnull(orig) and not pd.isnull(new):
                # Convert local file paths into synId.versionNumber strings
                orig_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i
                             for i in syn._convertProvenanceList(orig.split(';'))]
                new_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i
                             for i in syn._convertProvenanceList(new.split(';'))]
                assert_equals(set(orig_list), set(new_list))
Exemplo n.º 22
0
def test_syncFromSynapse():
    """This function tests recursive download as defined in syncFromSynapse
    most of the functionality of this function are already tested in the 
    tests/integration/test_command_line_client::test_command_get_recursive_and_query

    which means that the only test if for path=None
    """
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        syn.store(File(f, parent=folder_entity))
    # Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    syn.store(File(f, parent=project_entity))

    # syncFromSynapse() uses chunkedQuery() which will return results that are eventually consistent
    # but not always right after the entity is created.
    start_time = time.time()
    while len(list(syn.getChildren(project_entity))) != 2:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    # Test recursive get
    output = synapseutils.syncFromSynapse(syn, project_entity)

    assert_equals(len(output), len(uploaded_paths))
    for f in output:
        assert_in(f.path, uploaded_paths)
def test_syncFromSynapse():
    """This function tests recursive download as defined in syncFromSynapse
    most of the functionality of this function are already tested in the 
    tests/integration/test_command_line_client::test_command_get_recursive_and_query

    which means that the only test if for path=None
    """
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        syn.store(File(f, parent=folder_entity))
    # Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    syn.store(File(f, parent=project_entity))

    # syncFromSynapse() uses chunkedQuery() which will return results that are eventually consistent
    # but not always right after the entity is created.
    start_time = time.time()
    while len(list(syn.getChildren(project_entity))) != 2:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    # Test recursive get
    output = synapseutils.syncFromSynapse(syn, project_entity)

    assert_equals(len(output), len(uploaded_paths))
    for f in output:
        assert_in(f.path, uploaded_paths)
Exemplo n.º 24
0
#        if fname.endswith('sortedByCoord.out.bam'):
#            reversed_list_len += 1
#index = 0 
#for line in reversed_list:
#   for line in input_file:
#    line = line.strip().split('\t')
#    syn_id = line[0]
#    fname = line[1]
#    if fname.endswith('sortedByCoord.out.bam'):
#        index += 1
#        print(str(index)+'/'+str(reversed_list_len))
#        if os.path.isfile('RNAseq/CMC_HBCC/bam/'+fname):
#            print('RNAseq/CMC_HBCC/bam/'+fname+' exists')
#            continue
#        files = synapseutils.syncFromSynapse(syn, syn_id, path = 'RNAseq/CMC_HBCC/bam/')
files = synapseutils.syncFromSynapse(syn, 'syn10476936', path = 'RNAseq/CMC_HBCC/bam/')


# Download metadata for Brainseq(LIBD_szControl), CMC, and CMC_HBCC to check if we already have all those samples
#results = syn.tableQuery('select * from syn8466658 where "study" = \'CMC\' AND "assay" = \'rnaSeq\'')
#CMC_meta_dir = 'metadata/CMC/'
#if not os.path.exists(CMC_meta_dir):
#    os.makedirs(CMC_meta_dir)
#with open(CMC_meta_dir+'/CMC_RNAseq_metadata.txt','w') as out:
#    for row in results:
#        row = [str(x) for x in row]
#        out.write('\t'.join(row)+'\n')

#results = syn.tableQuery('select * from syn8466658 where "study" = \'CMC_HBCC\' AND "assay" = \'rnaSeq\'')
#CMC_HBCC_meta_dir = 'metadata/CMC_HBCC'
#if not os.path.exists(CMC_HBCC_meta_dir):
syn.login(user, password)

print('Sync AMP-AD')
download_MSBB = True
download_MAYOTCX = True
download_MAYOCBE = True
download_ROSMAP = False
download_all = True

# STAR MSBB
if download_MSBB:
    if not download_all:
        for file in syn.getChildren('syn8540822'):
            if file['name'].startswith('hB'):
                synapseutils.syncFromSynapse(syn,
                                             file['id'],
                                             path='BAMs/MSBB/')
                print(file['id'], file['name'])
    else:
        # aligned BAM files MSBB
        files = synapseutils.syncFromSynapse(syn,
                                             'syn8540822',
                                             path='BAMs/MSBB/')
        # STAR MSBB
        files = synapseutils.syncFromSynapse(syn,
                                             'syn12104381',
                                             path='MSBB/STAR')
        # metadata MSBB
        files = synapseutils.syncFromSynapse(syn,
                                             'syn7392158',
                                             path='metadata/')
def test_syncFromSynapse__empty_folder():
    folder = Folder(name="the folder", parent="whatever", id="syn123")
    with patch.object(syn, "getChildren", return_value=[]),\
         patch.object(syn, "get", return_value=Folder(name="asssdfa", parent="whatever")):
        assert_equals(list(), synapseutils.syncFromSynapse(syn, folder))
def test_syncFromSynapse__file_entity():
    file = File(name="a file", parent="some parent", id="syn456")
    with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\
         patch.object(syn, "get", return_value=file):
        assert_equals([file], synapseutils.syncFromSynapse(syn, file))
        patch_syn_get_children.assert_not_called()
Exemplo n.º 28
0
import os
import synapseclient
import synapseutils
import argparse
import getpass

parser = argparse.ArgumentParser(
    description='Download RNAseq and genotypes of CMC.')
parser.add_argument('RNAseq_directory',
                    help='Directory to download RNAseq data to')

args = parser.parse_args()

user = input("Synapse username:")
password = getpass.getpass('Synapse password:'******'Sync Brainseq')
# RNAseq
#files = synapseutils.syncFromSynapse(syn, 'syn8227833', path = 'RNAseq/')

# Phenotype file
files = synapseutils.syncFromSynapse(syn,
                                     'syn12299752',
                                     path='phenotype_data/')
files = synapseutils.syncFromSynapse(syn, 'syn7203084', path='phenotype_data/')
files = synapseutils.syncFromSynapse(syn, 'syn7203089', path='phenotype_data/')
files = synapseutils.syncFromSynapse(syn, 'syn8017780', path='phenotype_data/')
# Load required libraries
import synapseclient
import synapseutils

# login to Synapse
syn = synapseclient.login(
    email='',  # your synapse email id
    password=''  # your password
)
files = synapseutils.syncFromSynapse(syn, entity='syn20564743', path='./data/')
def test_syncFromSynapse__file_entity(syn):
    file = File(name="a file", parent="some parent", id="syn456")
    with patch.object(syn, "getChildren", return_value=[file]) as patch_syn_get_children,\
            patch.object(syn, "get", return_value=file):
        assert [file] == synapseutils.syncFromSynapse(syn, file)
        patch_syn_get_children.assert_not_called()
Exemplo n.º 31
0
import synapseclient
import getpass
import synapseutils
from datetime import datetime

today = datetime.today().strftime('%Y-%m-%d')

user = input("Synapse username:")
password = getpass.getpass('Synapse password:'******'syn16984409',
                             path=today + '-Sieberts-eQTLs/')

# Mayo TCX eQTL
synapseutils.syncFromSynapse(syn,
                             'syn16984410',
                             path=today + '-Sieberts-eQTLs/')

# Mayo CER eQTL
synapseutils.syncFromSynapse(syn,
                             'syn16984411',
                             path=today + '-Sieberts-eQTLs/')

# meta-analysis eQTL
synapseutils.syncFromSynapse(syn,
                             'syn16984815',
def test_syncFromSynase__manifest(syn):
    """Verify that we generate manifest files when syncing to a location outside of the cache."""

    project = Project(name="the project", parent="whatever", id="syn123")
    path1 = '/tmp/foo'
    file1 = File(name="file1", parent=project, id="syn456", path=path1)
    path2 = '/tmp/afolder/bar'
    file2 = File(name="file2",
                 parent=project,
                 id="syn789",
                 parentId='syn098',
                 path=path2)
    folder = Folder(name="afolder", parent=project, id="syn098")
    entities = {
        file1.id: file1,
        file2.id: file2,
        folder.id: folder,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    file_1_provenance = Activity(data={
        'used': '',
        'executed': '',
    })
    file_2_provenance = Activity(data={
        'used': '',
        'executed': '',
        'name': 'foo',
        'description': 'bar',
    })

    provenance = {
        file1.id: file_1_provenance,
        file2.id: file_2_provenance,
    }

    def getProvenance_side_effect(entity, *args, **kwargs):
        return provenance[id_of(entity)]

    expected_project_manifest = \
        f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription
{path1}\tsyn123\tfile1\tTrue\t\t\t\t\t
{path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar
"""

    expected_folder_manifest = \
        f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription
{path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar
"""

    expected_synced_files = [file2, file1]

    with tempfile.TemporaryDirectory() as sync_dir:

        with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\
                patch.object(syn, "get", side_effect=syn_get_side_effect),\
                patch.object(syn, "getProvenance") as patch_syn_get_provenance:

            patch_syn_get_provenance.side_effect = getProvenance_side_effect

            synced_files = synapseutils.syncFromSynapse(syn,
                                                        project,
                                                        path=sync_dir)
            assert sorted([id_of(e) for e in expected_synced_files
                           ]) == sorted([id_of(e) for e in synced_files])

            # we only expect two calls to provenance even though there are three rows of provenance data
            # in the manifests (two in the outer project, one in the folder)
            # since one of the files is repeated in both manifests we expect only the single get provenance call
            assert len(
                expected_synced_files) == patch_syn_get_provenance.call_count

            # we should have two manifest files, one rooted at the project and one rooted in the sub folder

            _compareCsv(
                expected_project_manifest,
                os.path.join(sync_dir, synapseutils.sync.MANIFEST_FILENAME))
            _compareCsv(
                expected_folder_manifest,
                os.path.join(sync_dir, folder.name,
                             synapseutils.sync.MANIFEST_FILENAME))
Exemplo n.º 33
0
import getpass

parser = argparse.ArgumentParser(
    description='Download RNAseq and genotypes of CMC.')
parser.add_argument('RNAseq_directory',
                    help='Directory to download RNAseq data to')
parser.add_argument('Genotype_directory',
                    help='Directory to download genotypes to')

args = parser.parse_args()

user = input("Synapse username:")
password = getpass.getpass('Synapse password:'******'Sync CMC')

# RNAseq
#files = synapseutils.syncFromSynapse(syn, 'syn3280440', path = args.RNAseq_directory)
# Genotypes
#files = synapseutils.syncFromSynapse(syn, 'syn3275211', path = args.Genotype_directory)
# Metadata
files = synapseutils.syncFromSynapse(syn, 'syn3354385', path='metadata/')
files = synapseutils.syncFromSynapse(syn, 'syn3346807', path='metadata/')
files = synapseutils.syncFromSynapse(syn, 'syn18358379', path='metadata/')
files = synapseutils.syncFromSynapse(syn, 'syn18403963', path='metadata/')
files = synapseutils.syncFromSynapse(syn, 'syn3346441', path='metadata/')
files = synapseutils.syncFromSynapse(syn, 'syn18358480', path='metadata/')
Exemplo n.º 34
0
import synapseclient
import synapseutils

syn = synapseclient.Synapse()
syn.login('vinyesm', 'M02p4sse?')
#files = synapseutils.syncFromSynapse(syn, 'syn2820442')

#files = synapseutils.syncFromSynapse(syn,'syn2820442','/home/marina/Marina/learning-gm/DREAM5/sub_challenge1')
#files = synapseutils.syncFromSynapse(syn,'syn2867578','/home/marina/Marina/learning-gm/DREAM5/sub_challenge2')
files = synapseutils.syncFromSynapse(
    syn, 'syn2787211',
    '/home/marina/Marina/learning-gm/DREAM5/Network-Inference')

#entity = syn.get('syn2820442')
#print(entity)
args = parser.parse_args()


user = input("Synapse username:")
password = getpass.getpass('Synapse password:'******'Sync AMP-AD')

# STAR MSBB
# files = synapseutils.syncFromSynapse(syn, 'syn12104381', path = 'MSBB/STAR')#, exclude='.bam')
# metadata MSBB
files = synapseutils.syncFromSynapse(syn, 'syn7392158', path = 'metadata/')#, exclude='.bam')

# STAR MAYO
#files = synapseutils.syncFromSynapse(syn, 'syn12104376', path = 'MAYO/STAR/')#, exclude='.bam')
# metadata MAYO
files = synapseutils.syncFromSynapse(syn, 'syn11384571', path = 'metadata/')#, exclude='.bam')
files = synapseutils.syncFromSynapse(syn, 'syn5223705', path = 'metadata/')#, exclude='.bam')
files = synapseutils.syncFromSynapse(syn, 'syn3817650', path = 'metadata/')#, exclude='.bam')

# STAR ROSMAP
#files = synapseutils.syncFromSynapse(syn, 'syn12104384', path = 'ROSMAP/STAR/')#, exclude='.bam')
# ROSMAP metadata
files = synapseutils.syncFromSynapse(syn, 'syn3157322', path = 'metadata')#, exclude='.bam')
files = synapseutils.syncFromSynapse(syn, 'syn11958660', path = 'metadata')#, exclude='.bam')
files = synapseutils.syncFromSynapse(syn, 'syn11384589', path = 'metadata')#, exclude='.bam')
Exemplo n.º 36
0
def get_dream_data(syn, cohort, omic_type, source=None):
    """Retrieves a particular -omic dataset used in the challenge.

    Args:
        syn (synapseclient.Synapse): A logged-into Synapse instance.
        cohort (str): A TCGA cohort included in the challenge.
        omic_type (str): A type of -omics used in the challenge.
            Note that multiple -omic types can be downloaded by listing
            the -omic types in a single string, separated by a '+'.

    Examples:
        >>> import synapseclient
        >>> syn = synapseclient.Synapse()
        >>> syn.login()
        >>>
        >>> get_dream_data(syn, "BRCA", "rna")
        >>> get_dream_data(syn, "OV", "cna", "PNNL")
        >>> get_dream_data(syn, "BRCA", "rna+cna")

    """
    syn_manifest = synapseutils.syncFromSynapse(syn,
                                                "syn10139523",
                                                ifcollision='overwrite.local')

    # if we want to use multiple -omic datasets, get Synapse ids
    # for all of them...
    if '+' in omic_type:
        omic_type = omic_type.split('+')
        dream_ids = [syn_ids[cohort][omic_tp] for omic_tp in omic_type]

    # ...otherwise, get the Synapse id for the one dataset
    else:
        dream_ids = syn_ids[cohort][omic_type]
        omic_type = [omic_type]
        dream_ids = [dream_ids]

    dream_ids = [
        dream_id if isinstance(dream_id, str) else dream_id[source]
        for dream_id in dream_ids
    ]

    # read in the -omic dataset(s) and merge them according to sample ID
    omic_list = {
        omic_tp: pd.read_csv(syn.get("syn{}".format(dream_id)).path,
                             sep='\t',
                             index_col=0).transpose()
        for dream_id, omic_tp in zip(dream_ids, omic_type)
    }

    # replaces missing values according to the -omic type
    omic_list = {
        omic_tp: (omic_tbl.fillna(0.0) if omic_tp == 'cna' else
                  omic_tbl.fillna(np.min(np.min(omic_tbl)) - 1))
        for omic_tp, omic_tbl in omic_list.items()
    }

    # merges datasets into one table, converts the pandas MultiIndex of this
    # table into a flat list of strings
    dream_data = pd.concat(omic_list, join='inner', axis=1)
    dream_data.columns = ["__".join(col) for col in dream_data.columns.values]

    return dream_data
Exemplo n.º 37
0
# Load required libraries
import synapseclient
import synapseutils
# login to Synapse
syn = synapseclient.login(
    email='',  # your synapse email id
    password=''  # your password
)
files = synapseutils.syncFromSynapse(syn, entity='syn18507661', path='./data/')

files = synapseutils.syncFromSynapse(syn, entity='syn20632048', path='./data/')
def test_syncFromSynapse__empty_folder(syn):
    folder = Folder(name="the folder", parent="whatever", id="syn123")
    with patch.object(syn, "getChildren", return_value=[]),\
            patch.object(syn, "get", return_value=Folder(name="asssdfa", parent="whatever")):
        assert list() == synapseutils.syncFromSynapse(syn, folder)
                    help='Directory to download RNAseq data to')

args = parser.parse_args()

user = input("Synapse username:")
password = getpass.getpass('Synapse password:'******'Sync psychEncode')
# BipSeq
#files = synapseutils.syncFromSynapse(syn, 'syn8403872', path = 'RNAseq/BipSeq/')
#files = synapseutils.syncFromSynapse(syn, 'syn5845180', path = 'metadata/BipSeq/')
# BrainGVEX
#files = synapseutils.syncFromSynapse(syn, 'syn7062404', path = 'RNAseq/BrainGVEX/')
#files = synapseutils.syncFromSynapse(syn, 'syn3270014', path = 'metadata/BrainGVEX/')
# EpiGABA
#files = synapseutils.syncFromSynapse(syn, 'syn4588490', path = 'RNAseq/EpiGABA/')
#files = synapseutils.syncFromSynapse(syn, 'syn4588489', path = 'metadata/EpiGABA/')
# UCLA-ASD
files = synapseutils.syncFromSynapse(syn,
                                     'syn4587614',
                                     path='metadata/UCLA_ASD/')
files = synapseutils.syncFromSynapse(syn,
                                     'syn4587615',
                                     path='RNAseq/UCLA_ASD/')

# LIBD_szControl
# NOTE: This is Brainseq, we already have these samples
Exemplo n.º 40
0
syn.login(user,password)


print('Sync AMP-AD')
download_MSBB = True
download_MAYOTCX = True
download_MAYOCBE = True
download_ROSMAP = True
download_all = True

# STAR MSBB
if download_MSBB:
    if not download_all:
        for file in syn.getChildren('syn8540822'):
            if file['name'].startswith('hB'):
                synapseutils.syncFromSynapse(syn, file['id'], path = 'BAMs/MSBB/')
                print(file['id'], file['name'])
    else:
        # fastq files
        files = synapseutils.syncFromSynapse(syn, 'syn8612191', path = 'fastq/MSBB/')

        # aligned BAM files MSBB
#        files = synapseutils.syncFromSynapse(syn, 'syn8540822', path = 'BAMs/MSBB/')
        # STAR MSBB
#        files = synapseutils.syncFromSynapse(syn, 'syn12104381', path = 'MSBB/STAR')
        # metadata MSBB
        files = synapseutils.syncFromSynapse(syn, 'syn7392158', path = 'metadata/')

if download_MAYOCBE:
    # fastq
    files = synapseutils.syncFromSynapse(syn, 'syn8612213', path = 'fastq/MayoCBE/')