Python encodeDNA示例

编程语言: Python

命名空间/包名称: concise.preprocessing.sequence

方法/功能: encodeDNA

hotexamples.com的示例: 4

Python encodeDNA - 已找到4个示例。这些是从开源项目中提取的最受好评的concise.preprocessing.sequence.encodeDNA现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def preprocessing_function(nucleotide_sequence: str) -> np.ndarray:
    """One-hot-encode (a single) the sequence.

    The kipoi deepbind model does not accept a string of nucleotides.

    Args:
        nucleotide_sequence (str): defined to be of lenght 101, though other
            lenghts might be accepted.

    Returns:
        np.ndarray: of shape `[1, len(nucleotide_sequence), 4]`
    """
    return encodeDNA([nucleotide_sequence])

示例#2

显示文件

文件： preproc.py 项目： liesaweigert/bpnet

def onehot_dinucl_shuffle(seqs):
    """Di-nucleotide shuffle the sequences
    """
    return encodeDNA(
        [dinuc_shuffle(s) for s in one_hot2string(seqs, vocab=DNA)])

示例#3

显示文件

def encode_dna(seq):
    return encodeDNA(seq)

示例#4

显示文件

def data(n_bases=10,
         spline_order=3,
         pos_class_weight=1.0,
         truncate=True,
         encode_splines=True,
         cache=True,
         minmax_scale=False):
    dtw_train = pd.read_csv(BR_DATA + "/train/wide_data.csv")
    dtw_test = pd.read_csv(BR_DATA + "/test/wide_data.csv")

    # replace some columns names
    dtw_train.rename(columns={
        "dist.1": "dist1",
        "dist.2": "dist2"
    },
                     inplace=True)
    dtw_test.rename(columns={
        "dist.1": "dist1",
        "dist.2": "dist2"
    },
                    inplace=True)

    y_train = col2num_array(dtw_train["is_branchpoint"], -1)
    x_train_pos = {col: col2num_array(dtw_train[col]) for col in pos_columns}
    x_train_seq = encodeDNA(dtw_train["seq"])
    y_test = col2num_array(dtw_test["is_branchpoint"])
    x_test_pos = {col: col2num_array(dtw_test[col]) for col in pos_columns}
    x_test_seq = encodeDNA(dtw_test["seq"])

    # get secondary structure - use a larger sequence window to precompute it
    # to get the larger sequence window first
    assert dtw_train["seq_wide"][0][100:-100] == dtw_train["seq"][
        0]  # correct values
    x_train_struct = get_structure(dtw_train["seq_wide"],
                                   BR_DATA + "/train/rna_structure.npy", cache)
    x_train_struct = x_train_struct[:, 100:-100]
    x_test_struct = get_structure(dtw_test["seq_wide"],
                                  BR_DATA + "/test/rna_structure.npy", cache)
    x_test_struct = x_train_struct[:, 100:-100]

    # convert all np.nans into -1's for y's and 0's for x's
    if truncate:
        x_train_pos = {
            k: truncate_values(k, v)
            for k, v in x_train_pos.items()
        }
        x_test_pos = {k: truncate_values(k, v) for k, v in x_test_pos.items()}

    # Ranges computed only on the trainin set
    position_stats = get_minmax_range(x_train_pos, x_train_pos)

    minmax_scalers = None
    if encode_splines:
        x_train_pos_spl = encodeSplines_common(x_train_pos, position_stats,
                                               n_bases, spline_order)
        x_test_pos_spl = encodeSplines_common(x_test_pos, position_stats,
                                              n_bases, spline_order)
    else:
        x_train_pos_spl = {
            k: fill_nan(v)[:, :, np.newaxis]
            for k, v in x_train_pos.items()
        }
        x_test_pos_spl = {
            k: fill_nan(v)[:, :, np.newaxis]
            for k, v in x_test_pos.items()
        }
        if minmax_scale:
            minmax_scalers = {
                k: MinMaxScaler().fit(v.reshape(-1, 1))
                for k, v in x_train_pos_spl.items()
            }
            x_train_pos_spl = {
                k: minmax_scalers[k].transform(v.reshape(
                    (-1, 1))).reshape(v.shape)
                for k, v in x_train_pos_spl.items()
            }
            x_test_pos_spl = {
                k: minmax_scalers[k].transform(v.reshape(
                    (-1, 1))).reshape(v.shape)
                for k, v in x_test_pos_spl.items()
            }

    x_train = merge_dicts(x_train_pos_spl, {
        "seq": x_train_seq,
        "struct": x_train_struct
    })
    x_test = merge_dicts(x_test_pos_spl, {
        "seq": x_test_seq,
        "struct": x_test_struct
    })

    y_train = fill_nan(y_train, -1)[:, :, np.newaxis]
    y_test = fill_nan(y_test, -1)[:, :, np.newaxis]

    # todo update return
    sample_weight = np.squeeze(np.where(y_train == 1, pos_class_weight, 1), -1)
    return (x_train, y_train, sample_weight, pos_columns,
            get_branchpoint_pwm_list(), position_stats,
            minmax_scalers), (x_test, y_test)