def synapse_login(self):
        print('Logging into Synapse...')
        self._username = self._username or os.getenv('SYNAPSE_USERNAME')
        self._password = self._password or os.getenv('SYNAPSE_PASSWORD')

        if not self._username:
            self._username = input('Synapse username: '******'Synapse password: '******'Synapse login failed: {0}'.format(str(ex)))

        return self._synapse_client is not None
示例#2
0
def submit(folder, id, password):

    syn = synapseclient.Synapse()
    syn.login(email=id, password=password)

    project = syn.get(7118431)  # Nabla
    #project = syn.get(7341111)  # Stackd

    submission_filenames = [fname for fname in os.listdir(folder)]

    for filename in submission_filenames:
        if filename.startswith('L'):
            evaluation = syn.getEvaluation(7071644)
        else:
            evaluation = syn.getEvaluation(7212779)
        print "Submitting", filename
        filename = os.path.join(folder, filename)

        f_handler = synapseclient.File(filename, parent=project)
        entity = syn.store(f_handler)
        syn.submit(evaluation, entity, name='test', team='Nabla')
示例#3
0
def main(wf_name, to_upload, synapse_parent_id):
    syn = synapseclient.Synapse()
    syn.login(os.environ["SYNAPSE_LOGIN"], apiKey=os.environ["SYNAPSE_API_KEY"])

    s_base_folder, remotes = _accumulate_remotes(synapse_parent_id, syn)

    for dirpath, _, filenames in os.walk(to_upload):
        remote_dirpath = os.path.join(s_base_folder.name, dirpath)
        if filenames:
            s_folder, remotes = _remote_folder(remote_dirpath, remotes, syn)
            for filename in filenames:
                remote_filename = os.path.join(remote_dirpath, filename)
                if remote_filename not in remotes:
                    filename = os.path.join(dirpath, filename)
                    if os.path.getsize(filename) > 0:
                        print("Uploading %s" % filename)
                        f = synapseclient.File(filename, parent=s_folder)
                        f.workflow = wf_name
                        f.workflowOption = "all"
                        s_filename = syn.store(f)
                        remotes[remote_filename] = s_filename.id
def main():
    import argparse
    import sys

    parser = argparse.ArgumentParser()

    parser.add_argument('inputfile',
                        nargs='?',
                        type=argparse.FileType('r'),
                        default=sys.stdout)

    parser.add_argument(
        '--overwrite',
        action="store_true",
        default=False,
        help="Overwrite existing annotations [default: %(default)s]")

    parser.add_argument(
        '--dryrun',
        action="store_true",
        default=False,
        help="Run without making changes [default: %(default)s]")

    parser.add_argument('--verbose',
                        action="store_true",
                        default=False,
                        help="Output status [default: %(default)s]")

    args = parser.parse_args()

    syn = synapseclient.Synapse(debug=False, skip_checks=True)
    syn.login(silent=True)

    bootstrapped_data = list(csv.DictReader(args.inputfile, delimiter='\t'))

    updater = UpdatePCBCAnnotations(syn, bootstrapped_data)

    updater.update_annotations(overwrite=args.overwrite,
                               dryrun=args.dryrun,
                               verbose=args.verbose)
示例#5
0
def main():
    parser = argparse.ArgumentParser(
        description='Plot experiment results for given mutation classifier.')

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')
    parser.add_argument('mut_levels', default='Form_base__Exon')
    parser.add_argument('--samp_cutoff', default=20)

    # parse command-line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(os.path.join(plot_dir, args.cohort, args.gene), exist_ok=True)

    prob_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif,
                     'samps_{}'.format(args.samp_cutoff),
                     args.mut_levels)).applymap(np.mean)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=None,
                           samp_cutoff=20,
                           mut_levels=['Gene'] + args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=firehose_dir,
                           syn=syn,
                           cv_prop=1.0)

    singl_mtypes = [
        mtype for mtype in prob_df.index if len(mtype.subkeys()) == 1
    ]

    for singl_mtype in singl_mtypes:
        plot_mtype_positions(prob_df.loc[singl_mtype, :], args, cdata)
示例#6
0
def main():
    parser = argparse.ArgumentParser(
        "Plot the inferred CNA scores for a cohort's samples against their "
        "actual CNA scores for a given set of cutoffs.")

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')

    # parse command-line arguments, create directory where plots will be saved
    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=[args.gene],
                           mut_levels=['Gene'],
                           expr_source='Firehose',
                           var_source='mc3',
                           expr_dir=firehose_dir,
                           copy_source='Firehose',
                           copy_dir=copy_dir,
                           copy_discrete=False,
                           syn=syn,
                           cv_prop=1.0)

    iso_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif))

    loss_df, gain_df = get_aucs(iso_df, args, cdata)
    plot_cna_scores(iso_df.loc[loss_df['CNA'].idxmax(), :], args, cdata)
    plot_cna_scores(iso_df.loc[gain_df['CNA'].idxmax(), :], args, cdata)

    plot_cna_scores(iso_df.loc[(loss_df['CNA'] - loss_df['Mut']).idxmax(), :],
                    args, cdata)
示例#7
0
def main(argv):
    """Runs the experiment."""

    # gets the directory where output will be saved and the name of the TCGA
    # cohort under consideration, loads the list of gene sub-variants
    print(argv)
    out_dir = os.path.join(base_dir, 'output', argv[0], argv[1], argv[2])
    coh_lbl = 'TCGA-{}'.format(argv[0])

    # loads the expression data and gene mutation data for the given TCGA
    # cohort, with the training/testing cohort split defined by the
    # cross-validation id for this task
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = VariantCohort(cohort=coh_lbl,
                          mut_genes=[argv[1]],
                          mut_levels=('Gene', 'Form', 'Exon', 'Location',
                                      'Protein'),
                          syn=syn,
                          cv_seed=(int(argv[3]) + 3) * 17)

    base_mtype = MuType({('Gene', argv[1]): None})
    optim = PartitionOptim(cdata, base_mtype, eval(argv[2]),
                           ('Form', 'Exon', 'Location', 'Protein'))

    while optim.traverse_branch():
        optim_mtypes = optim.best_optim()

    # saves classifier results to file
    out_file = os.path.join(out_dir, 'results', 'out__cv-{}.p'.format(argv[3]))
    pickle.dump(
        {
            'best': optim.best_mtypes,
            'hist': optim.mtype_scores,
            'pred': optim.pred_scores,
            'optim': optim.best_optim()
        }, open(out_file, 'wb'))
def main():

    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument("--config_file",
                               type=argparse.FileType("r"),
                               help="Full pathname for the YAML config file")
    parent_parser.add_argument("--consortium",
                               type=str,
                               default=None,
                               help="Consortium to create the table for")

    parser = argparse.ArgumentParser(parents=[parent_parser], add_help=True)

    subparsers = parser.add_subparsers()

    parser_new_table = subparsers.add_parser("new_table",
                                             help="New table help")
    parser_new_table.add_argument("--parent_synapse_id",
                                  type=str,
                                  help="Synapse ID of the parent project")
    parser_new_table.add_argument("--synapse_table_name",
                                  type=str,
                                  help="Name of the Synapse table")
    parser_new_table.set_defaults(func=process_new_table)

    parser_overwrite_table = subparsers.add_parser("overwrite_table",
                                                   help="Overwrite table help")
    parser_overwrite_table.add_argument(
        "--table_synapse_id",
        type=str,
        help="Synapse ID of the table to be overwritten")
    parser_overwrite_table.set_defaults(func=process_overwrite_table)

    args = parser.parse_args()

    dccv_syn = synapseclient.Synapse()
    dccv_syn.login(silent=True)

    args.func(args, dccv_syn)
示例#9
0
def main():
    parser = argparse.ArgumentParser(
        "Plot the distributions of gene weight coefficients inferred by a "
        "given Stan classifier trained to predict the mutation status of a "
        "gene in a given TCGA cohort."
        )

    parser.add_argument('model_name', type=str, help="label of a Stan model")
    parser.add_argument('solve_method', type=str,
                        help=("method used to obtain estimates for the "
                              "parameters of the model"))

    parser.add_argument('cohort', type=str, help="a TCGA cohort")
    parser.add_argument('gene', type=str, help="a mutated gene")

    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)
    vars_dict = load_vars(args.model_name, args.solve_method,
                          args.cohort, args.gene)

    if 'gn_wghts' not in vars_dict:
        raise ValueError("Can only plot inferred gene weights for a model "
                         "that includes them as variables!")

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ('/home/exacloud/lustre1/CompBio'
                                '/mgrzad/input-data/synapse')
    syn.login()

    cdata = MutationCohort(
        cohort=args.cohort, mut_genes=[args.gene], mut_levels=['Gene'],
        expr_source='Firehose', expr_dir=firehose_dir, var_source='mc3',
        syn=syn, cv_prop=1.0
        )

    wghts_df = pd.DataFrame(vars_dict['gn_wghts'],
                            index=sorted(cdata.genes - {args.gene}))
    plot_weights_cov(wghts_df, args, cdata)
示例#10
0
def main():
    parser = argparse.ArgumentParser(
        "Plots the clustering done by an unsupervised learning method on a "
        "TCGA cohort with subtypes of particular genes highlighted.")

    parser.add_argument('cohort', type=str, help='a cohort in TCGA')
    parser.add_argument('transform',
                        type=str,
                        help='an unsupervised learning method')
    parser.add_argument('mut_levels',
                        type=str,
                        help='a set of mutation annotation levels')
    parser.add_argument('--genes',
                        type=str,
                        nargs='+',
                        default=['TP53'],
                        help='a list of mutated genes')

    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=args.genes,
                           mut_levels=['Gene'] + args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=firehose_dir,
                           cv_prop=1.0,
                           syn=syn)

    mut_trans = eval(args.transform)()
    trans_expr = mut_trans.fit_transform_coh(cdata)

    for gene in args.genes:
        plot_subtype_clustering(trans_expr.copy(), args, cdata, gene)
def test_login():
    alt_syn = synapseclient.Synapse()
    username = "******"
    password = "******"
    with patch.object(alt_syn, "login") as mock_login, \
            patch.object(alt_syn, "getUserProfile", return_value={"userName": "******", "ownerId": "ownerId"})\
                    as mock_get_user_profile:
        run('synapse',
            '--skip-checks',
            'login',
            '-u',
            username,
            '-p',
            password,
            '--rememberMe',
            syn=alt_syn)
        mock_login.assert_called_once_with(username,
                                           password,
                                           forced=True,
                                           rememberMe=True,
                                           silent=False)
        mock_get_user_profile.assert_called_once_with()
示例#12
0
    def synapse_login(self):
        logging.info('Logging into Synapse...')
        self._username = self._username or os.getenv('SYNAPSE_USERNAME')
        self._password = self._password or os.getenv('SYNAPSE_PASSWORD')

        if not self._username:
            self._username = input('Synapse username: '******'Synapse password: '******'Synapse login failed: {0}'.format(str(ex)))

        return self._synapse_client is not None
    def login(cls, username=None, password=None):
        username = username or os.getenv('SYNAPSE_USERNAME')
        password = password or os.getenv('SYNAPSE_PASSWORD')

        if not username:
            username = input('Synapse username: '******'Synapse password: '******'Logging into Synapse as: {0}'.format(username))
        try:
            # Disable the synapseclient progress output.
            syn.core.utils.printTransferProgress = lambda *a, **k: None

            cls._synapse_client = syn.Synapse(skip_checks=True)
            cls._synapse_client.login(username, password, silent=True)
        except Exception as ex:
            cls._synapse_client = None
            logging.error('Synapse login failed: {0}'.format(str(ex)))

        return cls._synapse_client is not None
示例#14
0
def synapse_login(synapse_config=synapseclient.client.CONFIG_FILE):
    """Login to Synapse

    Args:
        synapse_config: Path to synapse configuration file.
                        Defaults to ~/.synapseConfig

    Returns:
        Synapse connection
    """
    try:
        syn = synapseclient.Synapse(configPath=synapse_config)
        syn.login(silent=True)
    except (SynapseNoCredentialsError, SynapseAuthenticationError):
        raise ValueError(
            "Login error: please make sure you have correctly "
            "configured your client.  Instructions here: "
            "https://help.synapse.org/docs/Client-Configuration.1985446156.html. "
            "You can also create a Synapse Personal Access Token and set it "
            "as an environmental variable: "
            "SYNAPSE_AUTH_TOKEN='<my_personal_access_token>'")
    return syn
示例#15
0
    def __init__(
        self,
        main_confs,
        user,
        push_config,
    ):
        """Initialize and validate basic information for a Push."""
        log.debug("Initializing Push obj.")

        self.main_confs = main_confs
        self.user = self._process_user(user=user, users=self.main_confs.USERS)
        self.push_id = None
        self.push_time = None
        self.push_config_path = push_config
        self.push_config = self._process_push_config(push_config=push_config)

        log.info("Initializing Synapse client.")
        self.syn = synapse.Synapse()
        self.dag = None

        log.info("Creating interaction instances.")
        self._create_interactions()
示例#16
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("-u", "--username", dest = "username",  \
                        help="User name", required=True)
    parser.add_argument("-p", "--password", dest = "password", help="Password",\
                        required=True)
    parser.add_argument("-c", "--cancer", nargs = "*", type = str, \
                        dest = "cancers", help="Cancer Types", \
                        required=True)

    args = parser.parse_args()
    for i, cancer in enumerate(args.cancers):
        args.cancers[i] = cancer.strip().upper()

    # use the synapse client to download relevant files
    syn = synapseclient.Synapse()
    syn.login(args.username, args.password)
    combinedbed = syn.get('syn13251251')
    mutationsextended = syn.get('syn13251247')
    clinicalpatient = syn.get('syn13251229')

    # parse all files and extract relevant information
    panels = make_panel_dictionary(combinedbed.path)  # panel to gene
    tumors = find_tumor_ids(clinicalpatient.path, args.cancers)
    sample_data = pull_sample_mutations(mutationsextended.path, tumors)
    mutations = make_mutations_list(sample_data)  # find all mutations
    panel_to_muts = make_panel_to_muts(panels, mutations)  # panel to mutation
    tumor_mutations = make_tumor_mutations(sample_data, mutations, \
                                            panel_to_muts, tumors)

    # parse dict containing all mutations for each tumor, output to tsv
    outname = "complete_mutations_table_%s.txt" % "_".join(args.cancers)
    with open(outname, "w") as output:
        output.write("\t".join(["Sample"] + mutations) + "\n")
        for i in tumor_mutations:
            output.write("\t".join([i] + list(map(str, tumor_mutations[i]))) +
                         "\n")
示例#17
0
def test_login():
    if not other_user['username']:
        raise SkipTest(
            "Skipping test for login command: No [test-authentication] in %s" %
            client.CONFIG_FILE)

    with patch("synapseclient.client.Synapse._writeSessionCache"
               ) as write_session_cache_mock:
        alt_syn = synapseclient.Synapse()
        output = run('synapse',
                     '--skip-checks',
                     'login',
                     '-u',
                     other_user['username'],
                     '-p',
                     other_user['password'],
                     '--rememberMe',
                     syn=alt_syn)
        cached_sessions = write_session_cache_mock.call_args[0][0]
        assert cached_sessions["<mostRecent>"] == other_user['username']
        assert other_user['username'] in cached_sessions
        assert alt_syn.username == other_user['username']
        assert alt_syn.apiKey is not None
示例#18
0
    def __init__(
        self,
        storageFileview: str,
        syn: synapseclient = None,
    ) -> None:
        """Instantiates a SynapseStorage object

        Args:
            syn: synapse client; if not provided instantiate one
            storageFileview: synapse ID of fileview containing administrative storage metadata; 
            TODO: move away from specific project setup and work with an interface that Synapse specifies (e.g. based on schemas)
        """

        if not syn:
            self.syn = synapseclient.Synapse()
            self.syn.login()
        else:
            self.syn = syn

        self.storageFileview = storageFileview

        # get data in administrative fileview for this pipeline
        self.setStorageFileviewTable()
def test_login():
    try:
        config = ConfigParser.ConfigParser()
        config.read(client.CONFIG_FILE)
        other_user = {}
        other_user['username'] = config.get('test-authentication', 'username')
        other_user['password'] = config.get('test-authentication', 'password')

        with patch("synapseclient.client.Synapse._writeSessionCache") as write_session_cache_mock:
            alt_syn = synapseclient.Synapse()
            output = run('synapse', '--skip-checks', 'login',
                         '-u', other_user['username'],
                         '-p', other_user['password'],
                         '--rememberMe',
                         syn=alt_syn)
            cached_sessions = write_session_cache_mock.call_args[0][0]
            assert cached_sessions["<mostRecent>"] == other_user['username']
            assert other_user['username'] in cached_sessions
            assert alt_syn.username == other_user['username']
            assert alt_syn.apiKey is not None

    except ConfigParser.Error:
        print("Skipping test for login command: No [test-authentication] in %s" % client.CONFIG_FILE)
示例#20
0
def main():
    parser = argparse.ArgumentParser(
        "Plot how well expression signatures separate isolated mutation "
        "subtypes from non-mutated samples relative to how they separate "
        "mutated samples not belonging to the subtype.")

    parser.add_argument('cohort', help='a TCGA cohort')
    parser.add_argument('gene', help='a mutated gene')
    parser.add_argument('classif', help='a mutation classifier')

    parser.add_argument('mut_levels',
                        default='Form_base__Exon',
                        help='a set of mutation annotation levels')
    parser.add_argument('--samp_cutoff', type=int, default=20)

    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = syn_root
    syn.login()

    cdata = MutationCohort(cohort=args.cohort,
                           mut_genes=[args.gene],
                           mut_levels=args.mut_levels.split('__'),
                           expr_source='Firehose',
                           expr_dir=firehose_dir,
                           syn=syn,
                           cv_prop=1.0)

    infer_df = load_infer_output(
        os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif,
                     'samps_{}'.format(args.samp_cutoff), args.mut_levels))
    auc_vals, sep_vals, prop_vals = get_separation(infer_df, args, cdata)

    plot_separation(auc_vals, sep_vals, prop_vals, args, cdata)
示例#21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('cohort', type=str, help='a cohort in TCGA')
    args = parser.parse_args()
    os.makedirs(plot_dir, exist_ok=True)

    # logs into Synapse using locally-stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    cdata = MutFreqCohort(cohort=args.cohort,
                          expr_source='Firehose',
                          expr_dir=firehose_dir,
                          cv_prop=1.0,
                          syn=syn)

    mut_trans = [('PCA', OmicPCA()), ('t-SNE', OmicTSNE()),
                 ('UMAP', OmicUMAP())]
    trans_dict = [(trs_lbl, trs.fit_transform_coh(cdata))
                  for trs_lbl, trs in mut_trans]

    plot_freq_clustering(trans_dict, args, cdata)
示例#22
0
def main():
    import argparse
    import sys
    
    parser = argparse.ArgumentParser()

    parser.add_argument("--configPath", type=str, default=synapseclient.client.CONFIG_FILE)
    parser.add_argument("--filename", type=str, default=None)
    parser.add_argument('--dryrun', action="store_true",
                        default=False,
                        help="Run without making changes [default: %(default)s]")

    parser.add_argument('--parentId', type=str)
    parser.add_argument('--storetable', action="store_true", default=False)
    
    parser.add_argument('--projectId', type=str)


    args = parser.parse_args()

    syn=synapseclient.Synapse(skip_checks=True, configPath=args.configPath)
    syn.login(silent=True)
    
    annots = PCBCAnnotations.PCBCAllAnnotations(syn)
    a = PCBCAnnotations.PCBCAllAnnotationTableUpdate(syn, annots)
    
    my_synapse_id = None
    
    # if not args.dryrun:
    #     my_synapse_id = synapseHelpers.thisCodeInSynapse(parentId='syn2758110', syn=syn)
    
    a.update_annots_synapse(filename=args.filename,
                            parentId=args.parentId, executed=my_synapse_id, dryrun=args.dryrun)
    
    if args.storetable:
        tbl = a.update_annots_table_synapse(projectId=args.projectId, dryrun=args.dryrun)
示例#23
0
def main(argv):

    # use your own Synapse cache and credentials here
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/"
                                "mgrzad/input-data/synapse")
    syn.login()

    # loads the challenge data
    cdata = TransferDreamCohort(syn,
                                argv[0],
                                intx_types=[argv[1]],
                                cv_seed=(int(argv[2]) * 41) + 1,
                                cv_prop=0.8)

    # initializes the model and fits it using all of the genes in the
    # `inter`section of the RNA genes, CNA genes, and proteome genes
    clf = mpt.StanDefault(argv[1])

    # finds the best combination of model hyper-parameters, uses these
    # parameters to fit to the data
    #clf.tune_coh(cdata, pheno='inter',
    #             tune_splits=4, test_count=4, parallel_jobs=16)
    clf.fit_coh(cdata, pheno='inter')

    out_file = os.path.join(base_dir, 'output', 'intx', argv[0], 'results',
                            'out_{}_{}.p'.format(argv[1], argv[2]))

    # saves the classifier performance, and the fitted posterior means of the
    # model variables and their names to file
    pickle.dump(
        {
            'Eval': clf.eval_coh(cdata, pheno='inter'),
            'PostMeans': clf.named_steps['fit'].post_means,
            'VarNames': clf.named_steps['fit'].var_names
        }, open(out_file, 'wb'))
def download_data_synapse(list_datasets):
    """Download data from Synapse 
    """

    # Connect to Synapse server
    print("Connecting to Synapse database...\n")
    syn = synapseclient.Synapse()
    syn.login('Machine_learning_project_70', 'Group_70')

    # Obtain a pointer and download the data
    print("--- Downloading datasets ---")
    for cancer_type in list_datasets:
        print(cancer_type)
        print("\tDataset ID:\t" + list_datasets[cancer_type][0])
        list_datasets[cancer_type][0] = syn.get(
            entity=list_datasets[cancer_type][0])
        print("\tLocal path:\t" + str(list_datasets[cancer_type][0].path))
        print("\tAnnotation ID:\t" + list_datasets[cancer_type][1])
        list_datasets[cancer_type][1] = syn.get(
            entity=list_datasets[cancer_type][1])
        print("\tLocal path:\t" + str(list_datasets[cancer_type][1].path))

    print("--- DONE ---\n")
    return list_datasets
示例#25
0
def run_synapse(docstore, parent, workdir):
    doc = FileDocStore(file_path=docstore)

    syn = synapseclient.Synapse()
    syn.login()

    for id, entry in doc.filter(visible=True, data_type='galaxy.datatypes.tabular.Vcf'):
        if 'tags' in entry:
            sample = None
            for s in entry['tags']:
                tmp = s.split(":")
                if tmp[0] == 'sample':
                    sample = tmp[1]
            name = entry['name']
            name = re.sub(r'.vcf$', '', name)
            file_name = sample + "." + name + ".snv_mnv.vcf"
            target = Target(uuid=entry['uuid'])
            if doc.size(target) > 0:
                src_file = doc.get_filename(target)
                dst_file = os.path.join(workdir, file_name)
                query = "select * from entity where parentId=='%s' and name=='%s'" % (parent, file_name + ".gz")
                r = syn.query(query)['results']
                if len(r) == 0:
                    #print r
                    print dst_file
                    shutil.copy(src_file, dst_file)
                    subprocess.check_call("bgzip %s" % (dst_file), shell=True)
                    f = synapseclient.File(dst_file + ".gz", parentId = parent, name=file_name + ".gz" )
                    f.fileType = 'vcf'
                    f.pipeline = 'UCSC'
                    f.variant_type = "snv"
                    f = syn.store(f,
                        executed="https://github.com/ucsccancer/pcawg_tools"
                    )
                else:
                    print "Skipping", file_name
示例#26
0
def genie_fusion_counts(synapse_credentials_file, synapse_genie_release_id, filename=None):
    # synapse client
    syn = synapseclient.Synapse()
    # login, credential file: {"email": "", "password": ""}
    syn.login(**json.loads(open(synapse_credentials_file, 'r').read()))

    # make dict of entities in genie release, keyed by 'name'
    # provide syn id of folder of the release to be examined
    genie_syn_entities = {entity['name']: entity for entity in syn.getChildren(synapse_genie_release_id)}
    # genie_syn_entities.keys()

    # get fusion data
    fusions_df = get_data(syn, genie_syn_entities['data_fusions.txt'])
    # get sample data
    sample_df = get_data(syn, genie_syn_entities['data_clinical_sample.txt'])
    # seq assay id info
    assay_df = get_data(syn, genie_syn_entities['assay_information.txt'])

    # all types of alterations
    # alt_types = np.unique([_ for l in seq_df['alteration_types'].str.split(';').values for _ in l])
    alt_type = 'structural_variants'

    # list of seq assay types with reported fusion data
    fusion_samples = pd.merge(fusions_df['Tumor_Sample_Barcode'].drop_duplicates(),
                              sample_df[['SAMPLE_ID', 'SEQ_ASSAY_ID']],
                              left_on='Tumor_Sample_Barcode',
                              right_on='SAMPLE_ID',
                              how='left')
    fusion_samples = fusion_samples['SEQ_ASSAY_ID'].value_counts().reset_index()
    fusion_samples.rename(columns={'index': 'SEQ_ASSAY_ID', 'SEQ_ASSAY_ID': 'Samples with fusion calls'}, inplace=True)
    # add flag as to whether SEQ_ASSAY_ID is described as covering the alt_type
    fusion_samples['Alteration type includes "structural_variants"'] = fusion_samples['SEQ_ASSAY_ID'].isin(assay_df['SEQ_ASSAY_ID'][assay_df['alteration_types'].str.contains(alt_type)])

    # write to file
    if filename is not None:
        fusion_samples.to_excel(filename, index=False)
import os
import synapseclient
from synapseclient import File
syn = synapseclient.Synapse(skip_checks=True)
syn.login(silent=True)

consortium = 'AMP-AD'
study = 'MSBB'
center = 'MSSM'
disease = 'Alzheimers Disease'
fileType = 'genomicMatrix'
organism = 'human'

toMove = {
    'syn3157412': {
        'parentId':
        'syn3157740',  #'traits_for_RNA-seq_age_censored.tsv'
        'dataType':
        'metaData',
        'tissueType':
        ['Frontal Pole', 'Superior Temporal Gyrus', 'Parahippocampal Gyrus'],
        'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
        'platform':
        '',
        'fileType':
        'genomicMatrix',
        'name':
        'AMP-AD_MSBB_MSSM_metaData_mRNA_IlluminaHiSeq2500_age_censored.tsv'
    },
    'syn3157409': {
        'parentId': 'syn3157740',  #'traits_for_array_data_age_censored.tsv'
示例#28
0
#!/usr/bin/env python
import synapseclient
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--synapse_config", required=True)
parser.add_argument("-u", "--userid", required=True)
parser.add_argument("-s", "--subject", required=True)
parser.add_argument("-b", "--body", required=True)
parser.add_argument("-t", "--content_type", default="text")

args = parser.parse_args()
syn = synapseclient.Synapse(configPath=args.synapse_config)
syn.login()

syn.sendMessage(userIds=[args.userid],
                messageSubject=args.subject,
                messageBody=args.body,
                contentType=args.content_type)
def main():

    if conf.CHALLENGE_SYN_ID == "":
        sys.stderr.write("Please configure your challenge. See sample_challenge.py for an example.")

    global syn

    parser = argparse.ArgumentParser()

    parser.add_argument("-u", "--user", help="UserName", default=None)
    parser.add_argument("-p", "--password", help="Password", default=None)
    parser.add_argument("--notifications", help="Send error notifications to challenge admins", action="store_true", default=False)
    parser.add_argument("--send-messages", help="Send validation and scoring messages to participants", action="store_true", default=False)
    parser.add_argument("--acknowledge-receipt", help="Send confirmation message on passing validation to participants", action="store_true", default=False)
    parser.add_argument("--dry-run", help="Perform the requested command without updating anything in Synapse", action="store_true", default=False)
    parser.add_argument("--debug", help="Show verbose error output from Synapse API calls", action="store_true", default=False)
    parser.add_argument("--threads", help="Number of parallel processes to use for validation and scoring", type=int, default=1)

    subparsers = parser.add_subparsers(title="subcommand")

    parser_list = subparsers.add_parser('list', help="List submissions to an evaluation or list evaluations")
    parser_list.add_argument("evaluation", metavar="EVALUATION-ID", nargs='?', default=None)
    parser_list.add_argument("--challenge-project", "--challenge", "--project", metavar="SYNAPSE-ID", default=None)
    parser_list.add_argument("-s", "--status", default=None)
    parser_list.add_argument("--all", action="store_true", default=False)
    parser_list.set_defaults(func=command_list)

    parser_status = subparsers.add_parser('status', help="Check the status of a submission")
    parser_status.add_argument("submission")
    parser_status.set_defaults(func=command_check_status)

    parser_reset = subparsers.add_parser('reset', help="Reset a submission to RECEIVED for re-scoring (or set to some other status)")
    parser_reset.add_argument("submission", metavar="SUBMISSION-ID", type=int, nargs='*', help="One or more submission IDs, or omit if using --rescore-all")
    parser_reset.add_argument("-s", "--status", default='RECEIVED')
    parser_reset.add_argument("--rescore-all", action="store_true", default=False)
    parser_reset.add_argument("--rescore", metavar="EVALUATION-ID", type=int, nargs='*', help="One or more evaluation IDs to rescore")
    parser_reset.set_defaults(func=command_reset)

    parser_validate = subparsers.add_parser('validate', help="Validate all RECEIVED submissions to an evaluation")
    parser_validate.add_argument("evaluation", metavar="EVALUATION-ID", nargs='?', default=None)
    parser_validate.add_argument("--all", action="store_true", default=False)
    parser_validate.add_argument("--canCancel", action="store_true", default=False)
    parser_validate.set_defaults(func=command_validate)

    parser_score = subparsers.add_parser('score', help="Score all VALIDATED submissions to an evaluation")
    parser_score.add_argument("evaluation", metavar="EVALUATION-ID", nargs='?', default=None)
    parser_score.add_argument("--all", action="store_true", default=False)
    parser_score.add_argument("--canCancel", action="store_true", default=False)
    parser_score.set_defaults(func=command_score)

    parser_rank = subparsers.add_parser('rank', help="Rank all SCORED submissions to an evaluation")
    parser_rank.add_argument("evaluation", metavar="EVALUATION-ID", default=None)
    parser_rank.set_defaults(func=command_rank)

    parser_archive = subparsers.add_parser('archive', help="Archive submissions to a challenge")
    parser_archive.add_argument("evaluation", metavar="EVALUATION-ID", default=None)
    parser_archive.add_argument("archiveType",metavar="TYPE", choices=["submission","writeup"])
    parser_archive.add_argument("destination", metavar="FOLDER-ID", default=None)
    parser_archive.add_argument("-q", "--query", default=None)
    parser_archive.add_argument("-n", "--name", default=None)
    parser_archive.set_defaults(func=command_archive)

    parser_leaderboard = subparsers.add_parser('leaderboard', help="Print the leaderboard for an evaluation")
    parser_leaderboard.add_argument("evaluation", metavar="EVALUATION-ID", default=None)
    parser_leaderboard.add_argument("--out", default=None)
    parser_leaderboard.set_defaults(func=command_leaderboard)

    args = parser.parse_args()

    print "\n" * 2, "=" * 75
    print datetime.utcnow().isoformat()

    ## Acquire lock, don't run two scoring scripts at once
    try:
        update_lock = lock.acquire_lock_or_fail('challenge', max_age=timedelta(hours=4))
    except lock.LockedException:
        print u"Is the scoring script already running? Can't acquire lock."
        # can't acquire lock, so return error code 75 which is a
        # temporary error according to /usr/include/sysexits.h
        return 75

    try:
        syn = synapseclient.Synapse(debug=args.debug)
        if not args.user:
            args.user = os.environ.get('SYNAPSE_USER', None)
        if not args.password:
            args.password = os.environ.get('SYNAPSE_PASSWORD', None)
        syn.login(email=args.user, password=args.password)

        ## initialize messages
        messages.syn = syn
        messages.dry_run = args.dry_run
        messages.send_messages = args.send_messages
        messages.send_notifications = args.notifications
        messages.acknowledge_receipt = args.acknowledge_receipt

        args.func(args)

    except Exception as ex1:
        sys.stderr.write('Error in scoring script:\n')
        st = StringIO()
        traceback.print_exc(file=st)
        sys.stderr.write(st.getvalue())
        sys.stderr.write('\n')

        if conf.ADMIN_USER_IDS:
            messages.error_notification(userIds=conf.ADMIN_USER_IDS, message=st.getvalue(), queue_name=conf.CHALLENGE_NAME)

    finally:
        update_lock.release()

    print "\ndone: ", datetime.utcnow().isoformat()
    print "=" * 75, "\n" * 2
示例#30
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('cohort', type=str, help='a TCGA cohort')
    parser.add_argument('gene', type=str, help='a mutated gene')
    parser.add_argument('classif', type=str, help='a mutated gene')

    parser.add_argument(
        'toil_dir',
        type=str,
        help='the directory where toil expression data is saved')
    parser.add_argument('syn_root',
                        type=str,
                        help='Synapse cache root directory')
    parser.add_argument(
        'patient_dir',
        type=str,
        help='directy where SMMART patient RNAseq abundances are stored')

    parser.add_argument(
        '--tune_splits',
        type=int,
        default=4,
        help='how many training cohort splits to use for tuning')
    parser.add_argument(
        '--test_count',
        type=int,
        default=16,
        help='how many hyper-parameter values to test in each tuning split')

    parser.add_argument(
        '--infer_splits',
        type=int,
        default=20,
        help='how many cohort splits to use for inference bootstrapping')
    parser.add_argument(
        '--infer_folds',
        type=int,
        default=4,
        help=('how many parts to split the cohort into in each inference '
              'cross-validation run'))

    parser.add_argument(
        '--parallel_jobs',
        type=int,
        default=4,
        help='how many parallel CPUs to allocate the tuning tests across')

    parser.add_argument('--cv_id', type=int, default=0)
    parser.add_argument('--verbose',
                        '-v',
                        action='store_true',
                        help='turns on diagnostic messages')

    args = parser.parse_args()
    out_dir = os.path.join(base_dir, 'output', 'gene_models', args.cohort,
                           args.gene)
    os.makedirs(out_dir, exist_ok=True)
    out_file = os.path.join(out_dir,
                            '{}__cv-{}.p'.format(args.classif, args.cv_id))

    if args.classif[:6] == 'Stan__':
        use_module = import_module('HetMan.experiments.utilities'
                                   '.stan_models.{}'.format(
                                       args.classif.split('Stan__')[1]))
        mut_clf = getattr(use_module, 'UsePipe')

    else:
        mut_clf = eval(args.classif)

    base_mtype = MuType({('Gene', args.gene): None})
    clf = mut_clf()

    # log into Synapse using locally stored credentials
    syn = synapseclient.Synapse()
    syn.cache.cache_root_dir = args.syn_root
    syn.login()

    cdata = CancerCohort(cancer=args.cohort,
                         mut_genes=[args.gene],
                         mut_levels=['Gene'],
                         tcga_dir=args.toil_dir,
                         patient_dir=args.patient_dir,
                         syn=syn,
                         collapse_txs=True,
                         cv_seed=(args.cv_id * 59) + 121,
                         cv_prop=1.0)
    smrt_samps = {samp for samp in cdata.samples if samp[:4] != 'TCGA'}

    clf.tune_coh(cdata,
                 base_mtype,
                 exclude_genes={args.gene},
                 exclude_samps=smrt_samps,
                 tune_splits=args.tune_splits,
                 test_count=args.test_count,
                 parallel_jobs=args.parallel_jobs)

    clf_params = clf.get_params()
    tuned_params = {par: clf_params[par] for par, _ in mut_clf.tune_priors}

    infer_mat = clf.infer_coh(cdata,
                              base_mtype,
                              force_test_samps=smrt_samps,
                              exclude_genes={args.gene},
                              infer_splits=args.infer_splits,
                              infer_folds=args.infer_folds)

    pickle.dump(
        {
            'Infer': infer_mat,
            'Info': {
                'TunePriors': mut_clf.tune_priors,
                'TuneSplits': args.tune_splits,
                'TestCount': args.test_count,
                'TunedParams': tuned_params
            }
        }, open(out_file, 'wb'))