Python LoadTable.sorted примеры использования

Язык программирования: Python

Пространство имен/Пакет: cogent3

Класс/Тип: LoadTable

Метод/Функция: sorted

Примеров на hotexamples.com: 5

Python LoadTable.sorted - 5 примеров найдено. Это лучшие примеры Python кода для cogent3.LoadTable.sorted, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LoadTable(23)

sorted(5)

write(5)

tolist(2)

with_new_column(2)

distinct_values(1)

filtered(1)

legend(1)

to_dict(1)

to_json(1)

Пример #1

Показать файл

def summary_stat_table(table, factors):
    '''returns summary statistics for classifier, feature set combination'''
    fscore_cols = [c for c in table.header if c.startswith('fscore')]
    distinct = table.distinct_values(factors)
    rows = []
    for comb in tqdm(distinct, ncols=80):
        subtable = table.filtered(lambda x: tuple(x) == tuple(comb),
                                  columns=factors)
        aurocs = numpy.array(subtable.tolist('auc'))
        mean_prec = numpy.array(subtable.tolist('mean_precision'))
        accuracy = numpy.array(subtable.tolist('balanced_accuracy'))
        row = list(comb) + [
            aurocs.mean(),
            aurocs.std(ddof=1),
            mean_prec.mean(),
            mean_prec.std(ddof=1),
            accuracy.mean(),
            accuracy.std(ddof=1)
        ]
        for col in fscore_cols:
            data = numpy.array(subtable.tolist(col))
            row.append(data.mean())
            row.append(data.std(ddof=1))
        rows.append(row)

    header = list(factors) + [
        'mean_auc', 'std_auc', 'mean_ap', 'std_ap', 'mean_balanced_accuracy',
        'std_balanced_accuracy'
    ]
    for col in fscore_cols:
        header.extend([f'mean_{col}', f'std_{col}'])

    table = LoadTable(header=header, rows=rows)
    table = table.sorted(reverse='mean_auc')
    return table

Пример #2

Показать файл

Файл: motif_count.py Проект: xtmgah/MutationMotif

def get_combined_counts(table, positions):
    bases = 'ACGT'
    if type(positions) == str:
        counts = reduced_one_position(table, positions)
        mut_counts = counts['M']
        unmut_counts = counts['R']
        positions = [positions]
        states = bases
        header = ['mut', 'base', 'count']
    else:
        counts = reduced_multiple_positions(table, *positions)
        mut_counts = counts['M']
        unmut_counts = counts['R']
        states = product(*list([bases] * len(positions)))
        header = ['mut'] + ['base%d' % (i + 1)
                            for i in range(len(positions))] + ['count']

    combined = []
    for state in states:
        combined.append(['R'] + list(state) + [unmut_counts[state]])
        combined.append(['M'] + list(state) + [mut_counts[state]])

    counts_table = LoadTable(header=header, rows=combined)
    counts_table = counts_table.sorted(columns=header[:-1])
    return counts_table

Пример #3

Показать файл

Файл: mutation_analysis.py Проект: xtmgah/MutationMotif

def get_grouped_combined_counts(table, position, group_label):
    """wraps motif_count.get_combined_counts for groups"""
    group_cats = table.distinct_values(group_label)
    all_data = []
    header = None
    for category in group_cats:
        subtable = table.filtered(lambda x: x == category, columns=group_label)
        counts = motif_count.get_combined_counts(subtable, position)
        if header is None:
            header = [group_label] + list(counts.header)

        counts = counts.with_new_column(group_label, lambda x: category,
                                        columns=counts.header[0])
        all_data.extend(counts.tolist(header))
    counts = LoadTable(header=header, rows=all_data)
    counts.sorted(columns=[group_label, 'mut'])
    return counts

Пример #4

Показать файл

def display_available_dbs(account, release=None):
    """displays the available Ensembl databases at the nominated host"""
    db_list = get_db_name(account=account, db_type="core", release=release)
    db_list += get_db_name(account=account, db_type="compara", release=release)
    rows = []
    for db_name in db_list:
        species_name = db_name.species
        if species_name:
            common_name = Species.get_common_name(db_name.species,
                                                  level="ignore")

        if "compara" in db_name.name:
            species_name = common_name = "-"
        rows.append([db_name.release, db_name.name, species_name, common_name])

    table = LoadTable(header=["Release", "Db Name", "Species", "Common Name"],
                      rows=rows,
                      space=2)
    table = table.sorted(["Release", "Db Name"])
    table.legend = (
        "Values of 'None' indicate cogent does not have a value for that database name."
    )
    return table

Пример #5

Показать файл

Файл: batch.py Проект: HuttleyLab/mutationorigin

def collate(base_path, output_path, exclude_paths, overwrite):
    """collates all classifier performance stats and writes
    to a single tsv file"""
    LOGGER.log_args()
    outpath = os.path.join(output_path, "collated.tsv.gz")
    logfile_path = os.path.join(output_path, "collated.log")
    if os.path.exists(outpath) and not overwrite:
        click.secho(f"Skipping. {outpath} exists. "
                    "Use overwrite to force.",
                    fg='green')
        exit(0)

    stat_fns = exec_command(f'find {base_path} -name' ' "*performance.json*"')
    stat_fns = stat_fns.splitlines()
    if not stat_fns:
        msg = f'No files matching "*performance.json*" in {base_path}'
        click.secho(msg, fg='red')
        return

    LOGGER.log_file_path = logfile_path

    records = []
    keys = set()
    exclude_paths = [] if exclude_paths is None else exclude_paths.split(',')
    num_skipped = 0
    for fn in tqdm(stat_fns, ncols=80):
        if skip_path(exclude_paths, fn):
            num_skipped += 1
            LOGGER.log_message(fn, label="SKIPPED FILE")
            continue

        LOGGER.input_file(fn)
        data = load_json(fn)
        labels = data['classification_report']['labels']
        fscores = data['classification_report']['f-score']
        row = {
            "stat_path": fn,
            "classifier_path": data["classifier_path"],
            "auc": data["auc"],
            "algorithm": data["classifier_label"],
            "mean_precision": data["mean_precision"],
            f"fscore({labels[0]})": fscores[0],
            f"fscore({labels[1]})": fscores[1],
            'balanced_accuracy': data['balanced_accuracy']
        }
        row.update(data["feature_params"])
        keys.update(row.keys())
        records.append(row)

    columns = sorted(keys)
    rows = list(map(lambda r: [r.get(c, None) for c in columns], records))
    table = LoadTable(header=columns, rows=rows)
    table = table.sorted(reverse="auc")
    table = table.with_new_column(
        "name",
        lambda x: model_name_from_features(*x),
        columns=["flank_size", "feature_dim", "usegc", "proximal"])
    table = table.with_new_column("size",
                                  sample_size_from_path,
                                  columns="classifier_path")
    table.write(outpath)
    LOGGER.output_file(outpath)

    # make summary statistics via grouping by factors
    factors = [
        "algorithm", "name", "flank_size", "feature_dim", "proximal", "usegc",
        "size"
    ]
    summary = summary_stat_table(table, factors=factors)
    outpath = os.path.join(output_path, "summary_statistics.tsv.gz")
    summary.write(outpath)
    LOGGER.output_file(outpath)
    if num_skipped:
        click.secho("Skipped %d files that matched exclude_paths" %
                    num_skipped,
                    fg='red')