Python split_fasta示例，seq_analysis_utils.split_fasta Python示例

示例#1

0

显示文件

文件： psortb.py 项目： jpmeneboo/pico_galaxy

        if parts == header:
            # Ignore the header line
            continue
        if not parts[-1] and len(parts) == len(header) + 1:
            # Ignore dummy blank extra column, e.g.
            # "...2.0\t\tPSORTb version 3.0\t\n"
            parts = parts[:-1]
        assert len(parts) == len(header), \
            "%i fields, not %i, in line:\n%r" % (len(line), len(header), line)
        out_handle.write(line)
        count += 1
    return count

# Note that if the input FASTA file contains no sequences,
# split_fasta returns an empty list (i.e. zero temp files).
fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK)
temp_files = [f + ".out" for f in fasta_files]
jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp)
        for fasta, temp in zip(fasta_files, temp_files)]


def clean_up(file_list):
    for f in file_list:
        if os.path.isfile(f):
            os.remove(f)
    try:
        os.rmdir(tmp_dir)
    except Exception:
        pass

if len(jobs) > 1 and num_threads > 1:

示例#2

0

显示文件

文件： promoter2.py 项目： Kirivije/pico_galaxy

                                  "Medium likely prediction",
                                  "Highly likely prediction"]:
                stop_err("ERROR: Problem with line: %r" % line)
            out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood))
    return queries
    
working_dir, bin = get_path_and_binary()

if not os.path.isfile(fasta_file):
   stop_err("ERROR: Missing input FASTA file %r" % fasta_file)

#Note that if the input FASTA file contains no sequences,
#split_fasta returns an empty list (i.e. zero temp files).
#We deliberately omit the FASTA descriptions to avoid a
#bug in promoter2 with descriptions over 200 characters.
fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "promoter"), FASTA_CHUNK, keep_descr=False)
temp_files = [f+".out" for f in fasta_files]
jobs = ["%s %s > %s" % (bin, fasta, temp)
        for fasta, temp in zip(fasta_files, temp_files)]

def clean_up(file_list):
    for f in file_list:
        if os.path.isfile(f):
            os.remove(f)
    try:
        os.rmdir(tmp_dir)
    except:
        pass

if len(jobs) > 1 and num_threads > 1:
    #A small "info" message for Galaxy to show the user.

示例#3

0

显示文件

文件： signalp3.py 项目： HuttonICS/pico_galaxy

            )
        )
    tab_handle.close()
    gff_handle.close()


if num_threads == 1:
    # Still want to call split_fasta to apply truncation, but
    # no reason to make multiple files - and more chance of
    # hitting file system glitches if we do. So,
    FASTA_CHUNK = sys.maxsize

fasta_files = split_fasta(
    fasta_file,
    os.path.join(tmp_dir, "signalp"),
    n=FASTA_CHUNK,
    truncate=truncate,
    max_len=MAX_LEN,
)
temp_files = [f + ".out" for f in fasta_files]
assert len(fasta_files) == len(temp_files)
jobs = [
    "signalp -short -t %s %s > %s" % (organism, fasta, temp)
    for (fasta, temp) in zip(fasta_files, temp_files)
]
assert len(fasta_files) == len(temp_files) == len(jobs)


def clean_up(file_list):
    """Remove temp files, and if possible the temp directory."""
    for f in file_list:

示例#4

0

显示文件

        assert first60.startswith("First60="), line
        first60 = first60[8:]
        assert predhel.startswith("PredHel="), line
        predhel = predhel[8:]
        assert topology.startswith("Topology="), line
        topology = topology[9:]
        out_handle.write(
            "%s\t%s\t%s\t%s\t%s\t%s\n" %
            (identifier, length, exp_aa, first60, predhel, topology))
        count += 1
    return count


# Note that if the input FASTA file contains no sequences,
# split_fasta returns an empty list (i.e. zero temp files).
fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"),
                          FASTA_CHUNK)
temp_files = [f + ".out" for f in fasta_files]
jobs = [
    "tmhmm -short %s > %s" % (fasta, temp)
    for fasta, temp in zip(fasta_files, temp_files)
]


def clean_up(file_list):
    """Remove temp files, and if possible the temp directory."""
    for f in file_list:
        if os.path.isfile(f):
            os.remove(f)
    try:
        os.rmdir(tmp_dir)
    except Exception:

示例#5

0

显示文件

文件： wolf_psort.py 项目： bgruening/pico_galaxy

num_threads = thread_count(sys.argv[2], default=4)
fasta_file = sys.argv[3]
tabular_file = sys.argv[4]

def clean_tabular(raw_handle, out_handle):
    """Clean up WoLF PSORT output to make it tabular."""
    for line in raw_handle:
        if not line or line.startswith("#"):
            continue
        name, data = line.rstrip("\r\n").split(None,1)
        for rank, comp_data in enumerate(data.split(",")):
            comp, score = comp_data.split()
            out_handle.write("%s\t%s\t%s\t%i\n" \
                             % (name, comp, score, rank+1))

fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK)
temp_files = [f+".out" for f in fasta_files]
assert len(fasta_files) == len(temp_files)
jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp)
        for (fasta, temp) in zip(fasta_files, temp_files)]
assert len(fasta_files) == len(temp_files) == len(jobs)

def clean_up(file_list):
    for f in file_list:
        if os.path.isfile(f):
            os.remove(f)

if len(jobs) > 1 and num_threads > 1:
    #A small "info" message for Galaxy to show the user.
    print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))
results = run_jobs(jobs, num_threads)

示例#6

0

显示文件

文件： wolf_psort.py 项目： JasperYH/pico_galaxy

tabular_file = sys.argv[4]


def clean_tabular(raw_handle, out_handle):
    """Clean up WoLF PSORT output to make it tabular."""
    for line in raw_handle:
        if not line or line.startswith("#"):
            continue
        name, data = line.rstrip("\r\n").split(None, 1)
        for rank, comp_data in enumerate(data.split(",")):
            comp, score = comp_data.split()
            out_handle.write("%s\t%s\t%s\t%i\n" %
                             (name, comp, score, rank + 1))


fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK)
temp_files = [f + ".out" for f in fasta_files]
assert len(fasta_files) == len(temp_files)
jobs = [
    "%s %s < %s > %s" % (exe, organism, fasta, temp)
    for (fasta, temp) in zip(fasta_files, temp_files)
]
assert len(fasta_files) == len(temp_files) == len(jobs)


def clean_up(file_list):
    for f in file_list:
        if os.path.isfile(f):
            os.remove(f)

示例#7

0

显示文件

文件： signalp3.py 项目： peterjc/pico_galaxy

            )
        )
    tab_handle.close()
    gff_handle.close()


if num_threads == 1:
    # Still want to call split_fasta to apply truncation, but
    # no reason to make multiple files - and more chance of
    # hitting file system glitches if we do. So,
    FASTA_CHUNK = sys.maxsize

fasta_files = split_fasta(
    fasta_file,
    os.path.join(tmp_dir, "signalp"),
    n=FASTA_CHUNK,
    truncate=truncate,
    max_len=MAX_LEN,
)
temp_files = [f + ".out" for f in fasta_files]
assert len(fasta_files) == len(temp_files)
jobs = [
    "signalp -short -t %s %s > %s" % (organism, fasta, temp)
    for (fasta, temp) in zip(fasta_files, temp_files)
]
assert len(fasta_files) == len(temp_files) == len(jobs)


def clean_up(file_list):
    """Remove temp files, and if possible the temp directory."""
    for f in file_list: