Пример #1
0
 def test_list_attributes(self):
     sb = SciBiomart()
     err = sb.list_attributes()
     assert err['err'] == MART_SET_ERR
     sb.set_mart('ENSEMBL_MART_ENSEMBL')
     err = sb.list_attributes()
     assert err['err'] == DATASET_SET_ERR
     sb.set_dataset('fcatus_gene_ensembl')
     df = sb.list_attributes(False)
     assert len(df['name'] == 'chromosome_name') > 0
     assert 'name_1059' in df[df['name'] == 'chromosome_name']['id'].values
     self.sb = sb
Пример #2
0
def run(args):

    sb = SciBiomart()
    if args.marts:  # Check if the user wanted to print the marts
        sb.list_marts(True)
        return
    sb.set_mart(args.m) # Otherwise set the mart
    if args.datasets: # Check if the user wanted to print the datasets
        sb.list_datasets(True)
        return
    sb.set_dataset(args.d) # Otherwise set the dataset
    if args.filters: # Check if the user wanted to print the filters
        sb.list_filters(True)
        return
    if args.attrs: # Check if the user wanted to print the filters
        sb.list_attributes(True)
        return
    if args.configs:
        sb.list_configs(True)
        return
    # Otherwise they actually have a query so we run it
    # Convert the filetrs string to a dict
    if args.f:
        filters = json.loads(args.f)
    else:
        filters = None
    if args.a:
        attrs = args.a.split(",")
    else:
        attrs = None
    if not attrs and args.s:  # We need the start and ends at least
        attrs = ['external_gene_name', 'chromosome_name', 'start_position', 'end_position', 'strand']
    sb.u.dp(['Running query on:',
             '\nMart: ', sb.mart,
             '\nDataset: ', sb.dataset_version,
             '\nFilters: ', filters,
             '\nAttributes: ', attrs])
    results_df = sb.run_query(filters, attrs)
    if args.s == 't':  # Check if we need to sort the file
        convert_dict = {'start_position': int,
                        'end_position': int,
                        'strand': int,
                        'chromosome_name': str}
        sb.u.warn_p(['Removing any genes with no gene name... Required for sorting.'])

        results_df = results_df[~results_df['external_gene_name'].isnull()]

        results_df = results_df.astype(convert_dict)
        results_df = sb.sort_df_on_starts(results_df)  # Note the user would have had to select the starts and ends

    saved_file = sb.save_as_csv(results_df, args.o)
    sb.u.dp(['Saved the output to:', saved_file])