示例#1
0
def bedtools_gcov(OMA):
    os.chdir(new_dir + OMA + "/bedtool_coverage/bam_files")
    for bam_file in glob.glob("*.bam"):
        bamID = bam_file[:-11]
        unix("bedtools genomecov -ibam " + bam_file + " -d > " +
             "positionsCov_" + bamID + ".txt",
             shell=True)
    unix("mv *.txt ../genCov_files", shell=True)
示例#2
0
def bowtie_build(OMA):
    os.chdir(new_dir)
    os.mkdir(OMA)
    os.chdir(OMA)
    os.mkdir("bowtie_index")
    os.chdir(old_dir + OMA)
    os.chdir("fusion_nuc_fams")
    for fasta_file in glob.glob("*.fasta"):
        fam_geneID = fasta_file[:-18]
        unix("bowtie2-build -f " + fasta_file + " " + new_dir + OMA +
             "/bowtie_index/" + fam_geneID,
             shell=True)
示例#3
0
def second_blast(fasta):
    sp_fasta = fasta.split('/')[-1]
    sp_assem = sp_fasta.split('.')[0]
    #Search for hbx genes with mmseq easy-search
    print('\n')
    print('Running MMseqs;', sp_assem)
    unix(
        'mmseqs easy-search ../../raw/hbx_data/family_data/' + args.gene +
        '.fasta ' + fasta + ' hbx_mmseqoutput/' + sp_assem + '_' + args.gene +
        '.m8 tmp --spaced-kmer-pattern 1101111 -k 6 -a -e 1 --num-iterations 2',
        shell=True)
    mmseq_outfile.append(sp_assem + '_' + args.gene + '.m8')
示例#4
0
def sam_to_bam(OMA):
    os.chdir(new_dir + OMA)
    os.mkdir("bedtool_coverage")
    os.chdir("bedtool_coverage")
    os.mkdir("bam_files")
    os.mkdir("genCov_files")
    os.chdir(new_dir + OMA + "/bowtie_mapping/")
    for sam_file in glob.glob("*.sam"):
        map_ID = sam_file[:-11]
        unix("samtools view -b " + sam_file + " | samtools sort -o " + map_ID +
             "_sorted.bam",
             shell=True)
    unix("mv *.bam ../bedtool_coverage/bam_files", shell=True)
示例#5
0
def hbx_blast(fasta):
    sp_file = fasta.split('/')[-1]
    sp_name = sp_file.split('.fast')[0]
    print('Running BLAST;', sp_name)
    #Make blast database from genome fasta files
    unix('makeblastdb -dbtype nucl -in ' + fasta + ' -out genome_blastdb/' +
         sp_name,
         shell=True)
    #Search for hbx genes in unannotated genomes
    unix(
        'tblastn -query ../../raw/hbx_data/homeobox.fasta -db genome_blastdb/'
        + sp_name +
        ' -evalue 1 -seg yes -max_target_seqs 5000 -outfmt "6 qseqid sseqid evalue pident bitscore qstart qend qlen sstart send slen" -out '
        + sp_name + '.blastoutput.fa',
        shell=True)
示例#6
0
def get_zpool_status():
    """call zpool status"""
    st, ret = unix("ZPOOL_SCRIPTS_AS_ROOT=1 zpool status -c size")
    html = '<pre>\n'
    cksum = False
    for line in ret.split('\n'):
        meter = "        "
        if line.find('state:') > -1:
            if line.find("ONLINE"):
                meter = "<meter id=\"bullet\" max=10 min=0 value=10 high=9 low=2 optimum=10></meter> "
            else:
                meter = "<meter id=\"bullet\"  max=10 min=0 value=10 high=9 low=2 optimum=0></meter> "
            html += line + " " + meter + "<br>"
        elif line.find("CKSUM") > -1:
            cksum = True
            html += "  " + line[1:] + '<br>'
        elif len(line.strip()) == 0:
            cksum = False
            html += '<br>'
        elif cksum:
            if line.find("ONLINE"):
                meter = "<meter id=\"bullet\" max=10 min=0 value=10 high=9 low=2 optimum=10></meter> "
            else:
                meter = "<meter id=\"bullet\" max=10 min=0 value=10 high=9 low=2 optimum=0></meter> "
            html += meter + line[1:] + "<br>"
        else:
            html += line + '<br>'
        #print(len(line))
    html += '</pre>'
    return html
def show_data():
    global Running, Preparing, BackingUp, ServerPresent
    out = ""

    if not hostisup():
        out += "Unavaiable ]\n"
        out += "Unavaiable ]"
        return out

    if unix(cmds["running"])[0] == 0:
        Running = True

    if unix(cmds["backingup"])[0] == 0:
        BackingUp = True

    if unix(cmds["preparing"])[0] == 0:
        Preparing = True

    status, output = unix(cmds["latest"])
    #print status, output
    if status == 0:
        ServerPresent = True

    if ServerPresent:
        datestr_in = output[output.find('->') + 3:]

        latest_time = time.strptime(datestr_in, "%Y-%m-%d-%H%M%S")
        #print "Latest Backup - %s" % datestr
        datestr_out = time.strftime("%d/%m/%y", latest_time)

    if not ServerPresent:
        out += "Server Off-line ]\n"
    else:
        if BackingUp:
            out += "Backing Up ]\n"
        elif Preparing:
            out += "Preparing ]\n"
        elif Running:
            out += "Running ]\n"
        else:
            out += "Idle ]\n"

        if latest_time < time.localtime():
            out += "%s ]\n" % (datestr_out)
    return out[:-1]
示例#8
0
def get_usage():
    """get disk usage"""
    st, ret = unix("df -h | grep -vE ' /dev| /run| /sys| /boot| /etc| /$'")
    html = '<pre>\n'
    for line in ret.split('\n'):
        try:
            perc = int(line.split()[4].replace('%', ''))
            html += "<meter max=100 min=0 value=%d high=75 low=50 optimum=19></meter> " % perc
        except ValueError:
            html += "         "
        html += line + '<br>'
    html += '</pre>'
    return html
示例#9
0
def get_drive_temps(pwd):
    """run the drive temps script"""
    st, ret = unix(pwd + '/zfs_drive_temps.sh -t')
    if ret.startswith('\n'):
        ret = ret[1:]
    html = '<pre>\n'
    for line in ret.split('\n'):
        if line.find("Celsius") > -1:
            tempC = int(line.split()[1])
            html += "<meter max=75 min=0 value=%d high=40 low=30 optimum=19></meter> " % tempC
        html += line.replace(' Celsius', '&deg;C').replace('-', ' ').replace(
            '_', ' ') + '<br>'
    html += '</pre>'
    return html
示例#10
0
def backupAlreadyRunning(errlog):
    """is the backup already running?
     - this works for MacOs and Linux
    """
    from subprocess import getstatusoutput as unix
    pid = os.getpid()
    pidof = "ps -eo pid,command | grep -i 'python .*myowncrashplan' "
    pidof += "| grep -v grep | awk -F' ' '{print $1}' | grep -v '^%d$'" % pid
    _st, out = unix(pidof)

    if out != "":
        errlog.info("Backup already running. [pid %s], so exit here." % (out))
        return True

    return False
示例#11
0
def bowtie_map(OMA):
    os.chdir(new_dir + OMA)
    os.mkdir("bowtie_mapping")
    os.chdir("bowtie_index")
    if OMA in SE_reads:
        single_IDs = []
        for single_file in glob.glob("*.bt2"):
            SRR_ID = single_file.split(".")[0]
            single_IDs.append(SRR_ID)
        for single_ID in set(single_IDs):
            for read_file in glob.glob(old_dir + OMA +
                                       "/data_fastq_trimmed/*.gz"):
                read = read_file.split("/")[-1]
                read_ID = read[:-14]
                unix("bowtie2 -x " + single_ID + " -U " + read_file +
                     " | samtools view -S -h -F4 - > " + single_ID + "_" +
                     read_ID + "_mapped.sam",
                     shell=True)

    else:
        if OMA in PE_reads:
            IDs = []
            for paired_file in glob.glob("*.bt2"):
                SRR_ID = paired_file.split(".")[0]
                IDs.append(SRR_ID)
            read_files = []
            for ID in set(IDs):
                for read_file in glob.glob(old_dir + OMA +
                                           "/data_fastq_trimmed/*.gz"):
                    read = read_file.split("/")[-1]
                    read_ID = read[:-14]
                    read_SRR = read_ID[:-9]
                    trim_unpair = read_ID[-9:]
                    if trim_unpair == '_unpaired':
                        continue
                    else:
                        read_files.append(read_SRR)
                for reads in set(read_files):
                    unix(
                        "bowtie2 -x " + ID + " -1 " + old_dir + OMA +
                        "/data_fastq_trimmed/" + reads +
                        "_1_paired_trim.fastq.gz " + "-2 " + old_dir + OMA +
                        "/data_fastq_trimmed/" + reads +
                        "_2_paired_trim.fastq.gz | samtools view -S -h -F4 - > "
                        + ID + "_" + reads + "_mapped.sam",
                        shell=True)
    #Move output SAM files to bowtie_mapping directory
    unix("mv *.sam ../bowtie_mapping/", shell=True)
示例#12
0
def data_download_user(query, species):
    '''
    Function called when using --query flag. Requires fasta
    file of query species barcode, along with name of species
    or genus to search against from BOLD database. Gets barcode
    sequence for the query species and related species specified
    '''

    os.makedirs('output', exist_ok=True)
    sp_query = query.split('.fa')[0]
    if species.split('_')[1] == '':
        try:
            os.mkdir('output/' + sp_query + '_genus_query')
        except:
            shutil.rmtree('output/' + sp_query + '_genus_query')
            os.mkdir('output/' + sp_query + '_genus_query')
        os.chdir('output/' + sp_query + '_genus_query')
    else:
        try:
            os.mkdir('output/' + sp_query + '_query')
        except:
            shutil.rmtree('output/' + sp_query + '_query')
            os.mkdir('output/' + sp_query + '_query')
        os.chdir('output/' + sp_query + '_query')

    with open('../../queries/' + query) as f, open(sp_query + '_query.fasta',
                                                   'w') as outF:
        for record in SeqIO.parse(f, 'fasta'):
            ID = record.description
            seq = str(record.seq)

            outF.write('>' + ID + '_query' + '\n' + seq + '\n')

    expected_sp_BOLD = sp_query.split('_')[0].title() + '%20' + sp_query.split(
        '_')[1]
    sister_sp_BOLD = species.split('_')[0].title() + '%20' + species.split(
        '_')[1]

    #Download barcode sequences from BOLD
    unix('wget http://www.boldsystems.org/index.php/API_Public/sequence?taxon='
         + expected_sp_BOLD,
         shell=True)
    unix('wget http://www.boldsystems.org/index.php/API_Public/sequence?taxon='
         + sister_sp_BOLD,
         shell=True)

    for bold in glob.glob('sequence*'):
        sp_fasta = bold.split('=')[1]
        sp_fasta = sp_fasta.replace(' ', '_')
        bold = bold.replace(' ', '\ ')
        unix('mv ' + bold + ' ' + sp_fasta + '.fas', shell=True)

    #Combine BOLD barcode seqs with DToL query barcode seq
    unix('cat *fas >> ' + sp_query + '_query.fasta', shell=True)

    #Remove duplicate sequences from the fasta file
    with open(sp_query + '_query.fasta') as f, open(sp_query + '_query.fa',
                                                    'w') as outF:
        bold_id_dict = {}
        for record in SeqIO.parse(f, 'fasta'):
            ID = record.description
            seq = str(record.seq)
            if ID not in bold_id_dict.keys():
                bold_id_dict[ID] = seq

        for k, v in bold_id_dict.items():
            if ('COI' in k) or ('query' in k):
                outF.write('>' + k + '\n' + v + '\n')

    os.chdir('../../')
示例#13
0
def trim_data(OMA_ID):
    print(OMA_ID, ' is being processed')

    #Trim fastq files to remove adapter (TruSeq3 file) and based on quality and read length
    os.chdir("/data0/bspm/shortRead_rerun_mapping/" + OMA_ID)
    os.mkdir("data_fastq_trimmed")
    os.chdir("data_fastq")
    if OMA_ID in SE_reads:
        unix("cp ../../TruSeq3-SE.fa .", shell=True)
        single_IDs = []
        for single_file in glob.glob("*.gz"):
            SRR_ID = single_file[:-9]
            single_IDs.append(SRR_ID)
        for single_ID in set(single_IDs):
            unix("java -jar /home/bspm/bin/Trimmomatic-0.38/trimmomatic-0.38.jar SE -phred33 -trimlog " + single_ID + "_Logfile.txt " + single_ID + ".fastq.gz" + " " + single_ID + "_trim.fastq.gz " + "ILLUMINACLIP:TruSeq3-SE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:10:30 MINLEN:35", shell=True)
    else:
        if OMA_ID in PE_reads:
            unix("cp ../../TruSeq3-PE.fa .", shell=True)
            IDs = []
            for paired_file in glob.glob("*.gz"):
                SRR_ID = paired_file[:-11]
                IDs.append(SRR_ID)
            for ID in set(IDs):
                unix("java -jar /home/bspm/bin/Trimmomatic-0.38/trimmomatic-0.38.jar PE -phred33 -trimlog " + ID + "_Logfile.txt " + ID + "_1.fastq.gz " + ID + "_2.fastq.gz " + ID + "_1_paired_trim.fastq.gz " + ID + "_1_unpaired_trim.fastq.gz " + ID + "_2_paired_trim.fastq.gz " + ID + "_2_unpaired_trim.fastq.gz " + "ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:10:30 MINLEN:35", shell=True)
    unix("mv *trim* ../data_fastq_trimmed/", shell=True)
    unix("mv *Logfile* ../data_fastq_trimmed/", shell=True)
    unix("rm *TruSeq3*", shell=True)

    #Run FastQC on trimmed files
    os.chdir("/data0/bspm/shortRead_rerun_mapping/" + OMA_ID)
    os.mkdir("FastQC_postTrim")
    os.chdir("data_fastq_trimmed")
    unix("perl /home/bspm/bin/FastQC/fastqc -o ../FastQC_postTrim --noextract -q *.gz", shell=True)

    print(OMA_ID, ' is finished processed')
示例#14
0
                                    gene_end + '.fasta', 'w') as outF1:
                                outF1.write('>' + gene_header + '\n' +
                                            gene_nuc_seq[0] + '\n')
                    else:
                        with open(
                                'temp_seqs/' + spName + '_' + geneName + '_' +
                                contig + '_' + gene_start + '_' + gene_end +
                                '.fasta', 'w') as outF3:
                            outF3.write('>' + gene_header + '\n' +
                                        gene_nuc_seq[0] + '\n')

##Translate nucleotide hbx sequences
os.chdir('temp_seqs')
for fasta in glob.glob("*.fasta"):
    unix('sixpack -sequence ' + fasta + ' -outfile ' + fasta +
         '.sixpack -outseq ' + fasta + '.sixpack.fa',
         shell=True)

outF = open(args.gene + '_HD_AA.fasta', 'w')
sorted_fasta = []
for fa in glob.glob("*fa"):
    sorted_fasta.append(fa)
sorted_fasta = sorted(sorted_fasta)

for fa in sorted_fasta:
    fa_info = fa.split('.')[0]
    if len(fa_info.split('_')) == 6:
        spName = '_'.join(fa_info.split('_')[:1])
        geneID = fa_info.split('_')[2]
        contig = fa_info.split('_')[3]
        pos = '_'.join(fa_info.split('_')[4:5])
def download_data(OMA_ID, SRA_ID):
    print(OMA_ID, ' is being processed')
    dir = os.mkdir(OMA_ID)
    os.chdir("/data1/bspm/shortRead_mapping/" + OMA_ID)
    #Download the sra directories for each taxa bioproject
    for ID in SRA_ID:
        first_ID = ID[:3]
        second_ID = ID[:6]
        final_ID = ID
        get_SRA = "ftp://ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByStudy/sra/" + first_ID + "/" + second_ID + "/" + final_ID + "/"
        unix("wget -r " + get_SRA, shell=True)

##Put all the .sra read files into data_fastq dir, and remove ftp folder
    os.chdir("/data1/bspm/shortRead_mapping/" + OMA_ID)
    for ID in SRA_ID:
        first_ID = ID[:3]
        second_ID = ID[:6]
        final_ID = ID
        if first_ID == "SRP":
            read_ID = "SRR"
        else:
            read_ID = "ERR"
        SRA_files = "ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByStudy/sra/" + first_ID + "/" + second_ID + "/" + final_ID + "/"
        unix("mv " + SRA_files + "*" + read_ID + "*" + "/*.sra " +
             "/data1/bspm/shortRead_mapping/" + OMA_ID,
             shell=True)

    os.chdir("/data1/bspm/shortRead_mapping/" + OMA_ID)
    unix("rm -r ftp-trace.ncbi.nih.gov", shell=True)
    unix("mkdir data_sra", shell=True)
    unix("mv *.sra data_sra", shell=True)
    os.chdir("/data1/bspm/shortRead_mapping/")

    ##Get fastq file for the SRA files using fastq-dump
    os.chdir("/data1/bspm/shortRead_mapping/" + OMA_ID)
    os.mkdir("data_fastq")
    os.chdir("data_sra")
    for sra_file in glob.glob("*.sra"):
        unix(
            "fastq-dump --gzip --skip-technical --readids --dumpbase --split-files "
            + sra_file,
            shell=True)
    unix("mv *.gz ../data_fastq", shell=True)
    unix("rm *.sra", shell=True)
    os.chdir("../")
    unix("rm -r data_sra", shell=True)
    os.chdir("/data1/bspm/shortRead_mapping/")
    print(OMA_ID, ' is finished processing')
示例#16
0
def data_download(barcode_results):
    '''
    Function called when using --barcode flag. Parse DToL 
    barcode excel sheet to find cases where species 
    identifation do not match barcode sequence.Download 
    barcode data from boldsystems.org for queries species.
    '''

    #Convert excel format to csv for parsing
    if args.barcode.lower().endswith('.xlsx'):
        barcode_results = args.barcode.split('.xlsx')[0]
        data_xls = pd.read_excel(args.barcode, dtype=str, index_col=None)
        data_xls.to_csv(barcode_results + '.csv',
                        encoding='utf-8',
                        index=False)
    elif args.barcode.lower().endswith('.csv'):
        barcode_results = args.barcode.split('.csv')[0]
    else:
        print('Error:')
        print('Excel or csv file required as input for --barcode')
        return None

    os.makedirs('output', exist_ok=True)
    os.makedirs('queries', exist_ok=True)
    #Download data for query species from BOLD database
    with open(barcode_results + '.csv') as f:
        next(f)
        for line in f:
            lines = line.split(',')
            specimen_ID = lines[2]
            expected_sp = lines[6].title() + '_' + lines[7]
            result_sp = lines[10].title() + '_' + lines[11]
            DNA = lines[15]

            flag = lines[14]
            if flag == 'C':  #Currently just parsing cases where barcode is flagged as yellow and has at least genus level hit
                if result_sp.split('_') != ['', '']:
                    with open('barphy_results.csv', 'a+') as outBar:
                        outBar.write(specimen_ID + ',' + expected_sp + ',' +
                                     result_sp + '\n')

                    if expected_sp.split('_')[1] == 'sp':
                        with open(
                                'queries/' + specimen_ID + '_' + expected_sp +
                                '.fasta', 'w') as outF_B:
                            outF_B.write('>' + expected_sp + '|' +
                                         specimen_ID + '\n' + DNA + '\n')

                    else:
                        #Save specimen IDs to list
                        query_list.append(specimen_ID)

                        try:
                            os.mkdir('output/' + specimen_ID)
                        except:
                            shutil.rmtree('output/' + specimen_ID)
                            os.mkdir('output/' + specimen_ID)

                        os.chdir('output/' + specimen_ID)

                        with open(specimen_ID + '_query.fasta', 'w') as outF:
                            outF.write('>' + expected_sp + '_DToL' + '\n' +
                                       DNA + '\n')

                        expected_sp_BOLD = lines[6].title() + '%20' + lines[7]
                        result_sp_BOLD = lines[10].title() + '%20' + lines[11]

                        #Download barcode sequences from BOLD
                        print(
                            'Downloading barcode data from BOLD (boldsystems.org)...'
                        )
                        unix(
                            'wget -q --show-progress  http://www.boldsystems.org/index.php/API_Public/sequence?taxon='
                            + expected_sp_BOLD,
                            shell=True)
                        unix(
                            'wget -q --show-progress http://www.boldsystems.org/index.php/API_Public/sequence?taxon='
                            + result_sp_BOLD,
                            shell=True)

                        for bold in glob.glob('sequence*'):
                            sp_fasta = bold.split('=')[1]
                            sp_fasta = sp_fasta.replace(' ', '_')
                            bold = bold.replace(' ', '\ ')
                            unix('mv ' + bold + ' ' + sp_fasta + '.fas',
                                 shell=True)

                        #Combine BOLD barcode seqs with DToL query barcode seq
                        unix('cat *fas >> ' + specimen_ID + '_query.fasta',
                             shell=True)

                        #Remove duplicate sequences from the fasta file
                        with open(specimen_ID + '_query.fasta') as f, open(
                                specimen_ID + '_query.fa', 'w') as outF:
                            bold_id_dict = {}
                            for record in SeqIO.parse(f, 'fasta'):
                                ID = record.description
                                seq = str(record.seq)
                                if ID not in bold_id_dict.keys():
                                    bold_id_dict[ID] = seq

                            for k, v in bold_id_dict.items():
                                if ('COI' in k) or ('DToL' in k):
                                    outF.write('>' + k + '\n' + v + '\n')

                        os.chdir('../../')

            elif flag == 'B':
                with open('barphy_results.csv', 'a+') as outBar:
                    outBar.write(specimen_ID + ',' + expected_sp + ',' +
                                 result_sp + '\n')
                with open(
                        'queries/' + specimen_ID + '_' + expected_sp +
                        '.fasta', 'w') as outF_B:
                    outF_B.write('>' + expected_sp + '|' + specimen_ID + '\n' +
                                 DNA + '\n')
示例#17
0
def tree_build(query):
    '''
    Created multiple sequence alignments from
    barcode fasta files using mafft. Create
    phylogenetic tree using IQTree
    '''
    pwd = os.getcwd()
    os.chdir(query)

    #Align using mafft and infer gene tree with IQTree
    for fa in glob.glob("*fa"):
        unix('sed -i "s/ /_/g" ' + fa, shell=True)
        print('\n')
        print('Constructing MSA and phylogenetic tree...')
        print('(If this step fails, see log files for what went wrong)')
        print('\n')
        unix('mafft --quiet ' + fa + ' > ' + fa.split('.')[0] + '.mft',
             shell=True)
        #unix('mafft --maxiterate 1000 --localpair ' + fa + ' > ' + fa.split('.')[0] + '.mft', shell=True)
        #unix('iqtree -quiet -s ' + fa.split('.')[0] + '.mft', shell=True)
        unix('iqtree -quiet -m GTR+G -s ' + fa.split('.')[0] + '.mft',
             shell=True)

    #Midpoint rooting to root the gene tree
    for tree in glob.glob("*.treefile"):
        midpoint_root(tree)

    #Create pdf file with tree image
    for rooted_tree in glob.glob("*.rooted"):
        queryID = query.split('/')[-1]

        rtre = toytree.tree(rooted_tree)
        Nnodes = rtre.nnodes

        colorlist = [
            "#de2d26" if ("query" in tip) or ("DToL" in tip) else "#000000"
            for tip in rtre.get_tip_labels()
        ]
        #colorlist = ["#de2d26" if "DToL" in tip else "#000000" for tip in rtre.get_tip_labels()]
        if Nnodes < 80:
            canvas, axes, mark = rtre.draw(
                tip_labels_align=True,
                tip_labels_colors=colorlist,
                width=1000,
                height=1000,
                tip_labels_style={"font-size": "15px"})
        elif Nnodes < 600:
            #canvas, axes, mark  = rtre.draw(tip_labels_colors=colorlist, edge_widths=0.1, layout='c', edge_type='p', width=800, height=800, tip_labels_style={"font-size": "2px"});
            canvas, axes, mark = rtre.draw(
                tip_labels_colors=colorlist,
                tip_labels_align=True,
                width=1000,
                height=5000,
                tip_labels_style={"font-size": "15px"})
        else:
            canvas, axes, mark = rtre.draw(
                tip_labels_colors=colorlist,
                tip_labels_align=True,
                width=1000,
                height=8000,
                tip_labels_style={"font-size": "15px"})
            #canvas, axes, mark  = rtre.draw(tip_labels_colors=colorlist, edge_widths=0.1, layout='c', edge_type='p', width=600, height=600, tip_labels_style={"font-size": "1px"});

        toyplot.pdf.render(canvas, queryID + "_tree.pdf")

        #unix('Rscript ' + pwd + '/plot_tree.R -t ' + rooted_tree + ' -o ' + queryID + '_tree.pdf > /dev/null 2>&1', shell=True)
        #unix('Rscript ' + pwd + '/plot_tree.R -t ' + rooted_tree + ' -o ' + queryID + '_tree.pdf', shell=True)
    os.chdir(pwd)
def hostisup():
    s, o = unix('ping -c1 -t1 -q skynet 2>/dev/null 1>&2')
    if s != 0:
        return False
    return True
                    if q.qsize() == (bp_end - bp_start + 1):
                        outF.write(famID + "\t" + geneID + "\t" + readID +
                                   "\n")
                        while q.empty() == False:
                            outF1.write(q.get())

                with open(my_file) as f:
                    for line in f:
                        lines = line.split('\t')
                        cov = lines[-1].strip()
                        nuc_pos = lines[1]
                        if (int(nuc_pos) >= bp_start_extended) and (
                                int(nuc_pos) <= bp_end_extended):
                            if int(cov) == 0:
                                covered = 'FALSE'
                                break
                            else:
                                bp_ext_len += 1

                    if bp_ext_len == (bp_end_extended - bp_start_extended + 1):
                        outF_01.write(famID + "\t" + geneID + "\t" + readID +
                                      "\n")

    unix("find -iname '*breakpoint_cov.txt' -type f -empty -delete",
         shell=True)
    os.chdir("../../../")

#                if covered == 'TRUE':
#                    outF.write(famID + "\t" + geneID + "\t" + readID + "\n")
#    os.chdir("../../../")
 def tearDown(self):
     """remove files created on server"""
     unix("ssh skynet rm -rf /tmp/spam2.log")
     unix("ssh skynet rm -rf /tmp/fred")
示例#21
0
parse.add_argument("--infile",
                   type=str,
                   help="input file to run reciprocal BLAST on",
                   required=True)
parse.add_argument("--outfile",
                   type=str,
                   help="output file name from reciprocal BLAST run",
                   required=True)

args = parse.parse_args()

#Run reciprocal blast
print('Running reciprocal BLASTx search...')
unix(
    'blastx -query ' + args.infile +
    ' -db ../../../raw/hbx_data/homeobox -evalue 1e-5 -num_threads 4 -seg yes -max_target_seqs 1 -outfmt "6 qseqid sseqid evalue pident bitscore qstart qend qlen sstart send slen" -out '
    + args.outfile,
    shell=True)

#Parse reciprocal BLASTx output
print('\n')
print('Parsing reciprocal BLAST output...')
sp_assem_list = {}
with open(args.outfile) as f:
    for line in f:
        sp = line.split('.')[0]
        sp_assem = line.split('|')[0]
        if sp_assem not in sp_assem_list:
            sp_assem_list[sp_assem] = sp

for assemb, species in sp_assem_list.items():
示例#22
0
# Usage python3 recip_blast.py --gene <gene name>

parse = argparse.ArgumentParser()

parse.add_argument("--gene",
                   type=str,
                   help="name of homeobox gene to obtain results for",
                   required=True)

args = parse.parse_args()

#Run reciprocal blast
print('Running reciprocal BLASTx search...')
unix(
    'blastx -query genome_' + args.gene +
    '_recipBlast.fasta -db ../../../raw/hbx_data/family_data/' + args.gene +
    ' -evalue 1e-5 -num_threads 5 -seg yes -max_target_seqs 1 -outfmt "6 qseqid sseqid evalue pident bitscore qstart qend qlen sstart send slen" -out recipBlast_'
    + args.gene + '.fa',
    shell=True)

#Parse reciprocal BLASTx output
print('\n')
print('Parsing reciprocal BLAST output...')
sp_list = []
with open('recipBlast_' + args.gene + '.fa') as f:
    for line in f:
        sp = line.split('|')[0]
        if sp not in sp_list:
            sp_list.append(sp)

os.makedirs('species_hbx', exist_ok=True)
for species in sp_list: