# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import pandas as pd import numpy as np import entrez import rnaseq_atlas from load_rna_data import load_rsem if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--filename", "-f", required=True, help="""File with two tab separated columns: Entrez gene IDs and RSEM expression values""") parser.add_argument( "pattern", help="Gene name, part of a gene name, or regular expression ") args = parser.parse_args() df = load_rsem(args.filename, translate_entrez_ids = True) mask = df.Hugo.str.contains(args.pattern) subset = df[mask] if len(subset) == 0: print "Pattern %s not found" % args.pattern else: print subset
help="Which column to select from the reference") parser.add_argument( "--percentile", help="Which rank percentile do we consider 'low'?", type = float, default = 0.25) parser.add_argument( "--compare-rsem", default = False, action = "store_true", help = "Compare RSEM values directly instead of rank (default: False)") args = parser.parse_args() if not args.sample_uses_hugo_ids: hugo_df = load_rsem(args.sample, translate_entrez_ids = True) else: hugo_df = load_rsem(args.sample, translate_entrez_ids = False) group_rank_method = 'average' if args.normal is None: if args.compare_rsem: normal_df = rnaseq_atlas.hugo_to_rpkm() else: normal_df = rnaseq_atlas.hugo_to_rank(group_rank_method) tissue_cols = rnaseq_atlas.TISSUE_COLUMNS else: normal_df = pd.read_csv(args.normal) normal_df = hugo_mapping.merge(normal_df, on = "Entrez") normal_df = normal_df.drop("Entrez", axis=1) tissue_cols = normal_df.columns[1:]