Python smart_split示例

编程语言: Python

命名空间/包名称: ifr

方法/功能: smart_split

hotexamples.com的示例: 2

Python smart_split - 已找到2个示例。这些是从开源项目中提取的最受好评的ifr.smart_split现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： affymetrix.py 项目： lakinsm/iterative_feature_removal

def get_geneids_from_affy(affy_id_list, affy_file=None):
    """
    Returns a dictionary mapping affy probe ids to
    the tuple (genebank,unigene,symbol) given an input list
    of affy probe ids, and a csv file from affymetrix with
    the appropriate information
    @param affy_id_list: A list of strings like '1000_at'...
    @param affy_file: If none, then the function get_affy_key_file()
    will be called to get the full file name and path to the csv file,
    else specify the filename/path.
    """
    if affy_file is None:
        affy_file = get_affy_key_file()

    affy_dict = {}

    lines = []
    with open(affy_file, "r") as f:
        for tmpline in f:
            if tmpline[0] != "#":
                lines.append(tmpline)  # omit header/comment lines

    for i, ln in enumerate(lines[1:]):  # lines[0] is the column headers
        ifr.print_progress(i, len(lines))
        tmp = ifr.smart_split(ln, sep=",")
        key = tmp[0]
        genebank = tmp[8]
        unigene = tmp[10]
        symbol = tmp[14]
        affy_dict[key] = (genebank, unigene, symbol)

    return affy_dict

示例#2

显示文件

文件： affymetrix.py 项目： lakinsm/iterative_feature_removal

def gen_affy_to_geneId_dict(affy_file_subdir="HG_U95A.na33.annot", affy_fn="HG_U95A.na33.annot.csv"):
    """
    Converts a list of affymetric probe set ids into a genelist
    with names suitable for querying gather or kegg. Generates
    a dictionary with entries { affy_id : gene_id_list }. Most times, gene_id_list will
    have only a single entry, but several probes have multiple Gene IDs given.
    @param affy_file_subdir: The subdirectory of the ifr.DATA_DIR that has
    the HG_U95A.na33.annot.csv file.
    @param affy_fn: The csv file in the subdirectory with the data. The parameter is
    provided in case the file was renamed from the orginal name of "HG_U95A.na33.annot.csv"
    @note: Relies on a data file called HG_U95A.na33.annot.csv that must be present
    in the HG_U95A.na33.annot subdirectory of the linked Data directory. It would
    be most efficient to use this function once and save the resulting dictionary
    in a pickle file for later use instead of having to re-parse the data.
    """
    affy_dict = {}

    affy_file = os.path.join(ifr.DATA_DIR, affy_file_subdir, affy_fn)
    lines = []
    with open(affy_file, "r") as f:
        for tmpline in f:
            if tmpline[0] != "#":
                lines.append(tmpline)  # omit header/comment lines

    for i, ln in enumerate(lines[1:]):  # lines[0] is the column headers
        ifr.print_progress(i, len(lines))
        tmp = ifr.smart_split(ln, sep=",")
        key = tmp[0]
        val = tmp[14]
        if val == "---":
            # this affy id has no gene symbol
            genelist = []
        elif "///" in val:  # there are more than one GeneIds for this probe
            # print "Subfield indicator in Gene Symbol for %s, line: %d."%(key,(i+1))
            # print "Val: %s"%tmp[14]
            genelist = [x.strip() for x in val.split("///") if x.strip() != ""]
        else:
            genelist = [val]

        affy_dict[key] = list(set(genelist))  # remove duplicates

    return affy_dict