def main(opts, mut_df=None, frameshift_df=None):
    # hack to index the FASTA file
    gene_fa = pysam.Fastafile(opts['input'])
    gene_fa.close()

    # Get Mutations
    if mut_df is None:
        mut_df = pd.read_csv(opts['mutations'], sep='\t')
    orig_num_mut = len(mut_df)

    # rename columns to fit my internal column names
    rename_dict = {
        'Hugo_Symbol': 'Gene',
        'Tumor_Sample_Barcode': 'Tumor_Sample',
        'Tumor_Seq_Allele2': 'Tumor_Allele'
    }
    mut_df.rename(columns=rename_dict, inplace=True)

    # drop rows with missing info
    na_cols = ['Gene', 'Tumor_Allele', 'Start_Position', 'Chromosome']
    mut_df = mut_df.dropna(subset=na_cols)
    logger.info('Kept {0} mutations after droping mutations with missing '
                'information (Droped: {1})'.format(len(mut_df),
                                                   orig_num_mut - len(mut_df)))

    # count frameshifts
    if opts['kind'] == 'tsg':
        if frameshift_df is None:
            # read in mutations
            if mut_df is None:
                mut_df = pd.read_csv(opts['mutations'], sep='\t')

            # count number of frameshifts
            frameshift_df = cf.count_frameshift_total(mut_df, opts['bed'],
                                                      opts['use_unmapped'])

        # calculate the proportion of inactivating
        #num_inact = len(mut_df[mut_df['Variant_Classification'].isin(utils.variant_inactivating)])
        #num_non_inact = len(mut_df[mut_df['Variant_Classification'].isin(utils.variant_non_inactivating)])
        num_fs = len(mut_df[mut_df['Variant_Classification'].isin(
            utils.variant_frameshift)])
        num_all = len(mut_df[mut_df['Variant_Classification'].isin(
            utils.all_variants)])
        #p_inactivating = float(num_inact) / (num_inact + num_non_inact)
        p_inactivating = float(num_fs) / num_all

    # select valid single nucleotide variants only
    mut_df = utils._fix_mutation_df(mut_df, opts['unique'])

    # log random number seed choice if provided
    if opts['seed'] is not None:
        logger.info('Pseudo Random Number Generator Seed: {0}'.format(
            opts['seed']))

    # read BED file
    bed_dict = utils.read_bed(opts['bed'])

    # Perform BH p-value adjustment and tidy up data for output
    if opts['kind'] == 'oncogene':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
        permutation_df = pr.handle_oncogene_results(permutation_result,
                                                    opts['num_iterations'])
    elif opts['kind'] == 'tsg':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts,
                                                      frameshift_df,
                                                      p_inactivating)
        permutation_df = pr.handle_tsg_results(permutation_result)
    elif opts['kind'] == 'hotmaps1d':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
        #frameshift_df, p_inactivating)
        permutation_df = pr.handle_hotmaps_results(permutation_result)
    elif opts['kind'] == 'protein':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
        permutation_df = pr.handle_protein_results(permutation_result)
    elif opts['kind'] == 'effect':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
        permutation_df = pr.handle_effect_results(permutation_result)

    # save output
    if opts['output']:
        permutation_df.to_csv(opts['output'], sep='\t', index=False)

    return permutation_df
def main(opts, mut_df=None, frameshift_df=None):
    # hack to index the FASTA file
    gene_fa = pysam.Fastafile(opts['input'])
    gene_fa.close()

    # Get Mutations
    if mut_df is None:
        mut_df = pd.read_csv(opts['mutations'], sep='\t')
    orig_num_mut = len(mut_df)

    # rename columns to fit my internal column names
    rename_dict = {
        'Hugo_Symbol': 'Gene',
        'Tumor_Sample_Barcode': 'Tumor_Sample',
        'Tumor_Seq_Allele2' : 'Tumor_Allele'
    }
    mut_df.rename(columns=rename_dict, inplace=True)

    # drop rows with missing info
    na_cols = ['Gene', 'Tumor_Allele', 'Start_Position', 'Chromosome']
    mut_df = mut_df.dropna(subset=na_cols)
    logger.info('Kept {0} mutations after droping mutations with missing '
                'information (Droped: {1})'.format(len(mut_df), orig_num_mut - len(mut_df)))

    # count frameshifts
    if opts['kind'] == 'tsg':
        if frameshift_df is None:
            # read in mutations
            if mut_df is None:
                mut_df = pd.read_csv(opts['mutations'], sep='\t')

            # count number of frameshifts
            frameshift_df = cf.count_frameshift_total(mut_df, opts['bed'],
                                                      opts['use_unmapped'])

        # calculate the proportion of inactivating
        #num_inact = len(mut_df[mut_df['Variant_Classification'].isin(utils.variant_inactivating)])
        #num_non_inact = len(mut_df[mut_df['Variant_Classification'].isin(utils.variant_non_inactivating)])
        num_fs = len(mut_df[mut_df['Variant_Classification'].isin(utils.variant_frameshift)])
        num_all = len(mut_df[mut_df['Variant_Classification'].isin(utils.all_variants)])
        #p_inactivating = float(num_inact) / (num_inact + num_non_inact)
        p_inactivating = float(num_fs) / num_all

    # select valid single nucleotide variants only
    mut_df = utils._fix_mutation_df(mut_df, opts['unique'])

    # log random number seed choice if provided
    if opts['seed'] is not None:
        logger.info('Pseudo Random Number Generator Seed: {0}'.format(opts['seed']))

    # read BED file
    bed_dict = utils.read_bed(opts['bed'])

    # Perform BH p-value adjustment and tidy up data for output
    if opts['kind'] == 'oncogene':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
        permutation_df = pr.handle_oncogene_results(permutation_result,
                                                    opts['num_iterations'])
    elif opts['kind'] == 'tsg':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts,
                                                      frameshift_df, p_inactivating)
        permutation_df = pr.handle_tsg_results(permutation_result)
    elif opts['kind'] == 'hotmaps1d':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
                                                      #frameshift_df, p_inactivating)
        permutation_df = pr.handle_hotmaps_results(permutation_result)
    elif opts['kind'] == 'protein':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
        permutation_df = pr.handle_protein_results(permutation_result)
    elif opts['kind'] == 'effect':
        permutation_result = multiprocess_permutation(bed_dict, mut_df, opts)
        permutation_df = pr.handle_effect_results(permutation_result)

    # save output
    if opts['output']:
        permutation_df.to_csv(opts['output'], sep='\t', index=False)

    return permutation_df