示例#1
0
 def check_a_line(a_line):
     if is_5col_bed(last_line):
         return True
     else:
         if check_3col_bed:
             if is_3col_bed(last_line):
                 warn("You input a 3 column bed file like this:\t\t%s" %
                      last_line[:50])
                 info("[3 Column to 5 Column] %s ==> %s " %
                      (suffix_5col(fname), fname))
                 threetofive(fname)
                 info(
                     "Use %s instead of %s as the input BED and run the executive Again"
                     % (suffix_5col(fname), fname))
             else:
                 error(
                     "The input bed file %s has a wrong format!(3 column checking active)"
                     % fname)
                 print "Wrong Format:\t\t\t%s" % last_line[:50]
                 print "Right Format should look like:\t%s" % (
                     'chr1\t567577\t567578\tMACS_peak_1\t119.00')
                 print "Or the depreciate 3-column format like this:\t%s" % (
                     'chr1\t567577\t567578')
         else:
             error("The input bed file %s has a wrong format!" % fname)
             print "Wrong Format:\t\t\t%s" % last_line[:50]
             print "Right Format should look like:\t%s" % (
                 'chr1\t567577\t567578\tMACS_peak_1\t119.00')
         return False
示例#2
0
def check_fasta_dna(fname):
    """
    Check if a file has the format of fasta

    @type  fname: str
    @param fname: path of the file to be checked
    
    @rtype:   bool
    @return:  whether the file passed the fasta check
    """
    if not check_common(fname, ".fa",
                        maxsize=10737418240):  # 10G=10*1024^3=10737418240
        return False
    print fname
    with open(fname) as fasta_f:
        first_line = fasta_f.readline()
        if not first_line[0] == ">":
            error("The input fasta file %s has a wrong format!" % fname)
            print "Wrong Format:\t\t\t%s" % first_line[:50]
            print "Right Format should look like:\t%s" % (
                '>chr1:1150372-1150572')
            return False

        second_line = fasta_f.readline()
        fasta_pattern_scd = "[AGCTN]+"
        if not re.search(fasta_pattern_scd, second_line):
            error("The input fasta file %s has a wrong format!" % fname)
            print "Wrong Format:\t\t\t%s" % second_line[:50]
            print "Right Format should look like:\t%s" % ('NGGGCCATTCA')
            return False
    return True
示例#3
0
def fetch_seq_record(fasta_file, alpha=_alphabet):
    """
    Fetch the sequence's nucleotide order and position information from a fasta file
    @type  fasta_file: str
    @param fasta_file: path of the XML file
    @rtype:   list
    @return:  sequence information
    """
    if not check_fasta_dna(fasta_file):
        error("fasta file validation failed")
        sys.exit(1)
    raw_seq_list = SeqIO.parse(fasta_file, "fasta", alpha)
    return list(raw_seq_list)
示例#4
0
def check_cmd(command_):
    """
    Check whether a command can be run in shell

    @type  command_: str
    @param command_: the command you want to check, for example, "awk"
    @rtype:   bool
    @return:  whether the command passed the check
    """
    exit_code = subcall(command_, shell=True, stdout=-1, stderr=-1)
    # Get the exit code without printing standard output

    print("check command %s" % command_).center(30, "-")

    if exit_code == 127:  # when command not found, exit_code is 127
        error("No such command as '%s'" % command_)
        return False
    else:
        return True
示例#5
0
def check_xml(fname):
    """
    Check if a file has the format of xml

    @type  fname: str
    @param fname: path of the file to be checked
    
    @rtype:   bool
    @return:  whether the file passed the xml check
    """
    if not check_common(fname, ".xml",
                        maxsize=10485760):  # 10M = 1024*1024*10 = 10485760
        return False

    xmltree = ElementTree()
    try:
        xmltree.parse(fname)
    except:
        error("Fail to parser the xml file.")
        error(
            "The input XML file %s has a wrong format, please check it again."
            % fname)
        return False

    for pos in xmltree.findall("motif"):
        #get key and set empty element
        key = pos.get('id')
        if not key:
            error("No 'id' found for node, not a xml for motif information?")
            return False

    return True
示例#6
0
def fetch_pssm_xml(xmlfile):
    """
    Fetch the motif's pssm and other information from an XML file

    @type  xmlfile: str
    @param xmlfile: path of the XML file
    @rtype:   dict
    @return:  motif information
    """
    if not check_xml(xmlfile):
        error("xml file validation failed")
        sys.exit(1)
    mp = MP.MotifParser()
    mp.tag_list = ["dbd", "synonym", "description"]
    mp.Parser(xmlfile)
    for m_id in mp.motifs:
        one_pssm = mp.motifs[m_id]['pssm']
        for one_pos in one_pssm:
            for i in range(0, 4):
                if one_pos[i] == 0.0:
                    one_pos[i] += 0.00001
                    print "Found one"
    return mp.motifs
示例#7
0
def check_common(fname, suffix, maxsize=1073741824):  # 1GB=1024^3=1073741824B
    """
    Check if a file has the specified suffix and smaller than maxsize

    @type  fname: str
    @param fname: path of the file to be checked
    @type  suffix: str
    @param suffix: the suffix limit, if not matched, WARNING will appears, but WON'T fail in this check
    @type  maxsize: str
    @param maxsize: the max limit of the file to be checked

    @rtype:   bool
    @return:  whether the file passed the check
    """
    if not os.path.isfile(fname):
        error("No such bed file: %s" % fname)
        return False
    if os.path.getsize(fname) > maxsize:
        error("The input file %s is larger than maxsize:%d bytes!" %
              (fname, maxsize))
        return False
    if not fname.endswith(suffix):
        warn("Your input file %s doesn't have the suffix %s" % (fname, suffix))
    return True