Пример #1
0
 def pre_begin_file(self):
     self.vars = []
     self.items = align.Align(titles=1)
     self.items.add(['# MN', 'HR', 'DOM', 'MON', 'DOW', 'COMMAND_____'])
     alignment = 'aaaccl'
     self.items.set_title_alignment(alignment)
     self.items.set_alignment(alignment)
     return
Пример #2
0
 def pre_begin_file(self):
     self.vars = []
     self.settings = align.Align(titles=1)
     self.settings.add([
         '# NAME',
         'VALUE',
     ])
     alignment = 'll'
     self.settings.set_title_alignment(alignment)
     self.settings.set_alignment(alignment)
     #
     self.actions = align.Align(titles=1)
     self.actions.add(['# ID', 'ACTION'])
     alignment = 'rl'
     self.actions.set_title_alignment(alignment)
     self.actions.set_alignment(alignment)
     return
Пример #3
0
 def report(self, final=False):
     if not final:
         self.lines.sort()
         a = align.Align()
         for parts in self.lines:
             a.add(parts)
         for _, parts in a.get_items():
             self.println(' '.join(parts))
     return
Пример #4
0
    def find_faces(self, image):

        faces = []
        #
        aligned_list, bboxes = align.Align(image)
        if aligned_list and bboxes:
            for bb, cropped_face in zip(bboxes, aligned_list):
                face = Face()
                face.container_image = image
                face.bounding_box = np.zeros(4, dtype=np.int32)
                # 获取bbox
                for i in range(0, 4):
                    face.bounding_box[i] = bb[i]
            # 获取矫正后的人脸
            # new_image = np.transpose(cropped_face, (1, 2, 0))[:, :, ::-1]
            # out = Image.fromarray(new_image)
            # out = out.resize((112, 112))
            # out = np.asarray(out)
            # face.image = out
                face.image = cropped_face
                faces.append(face)

        return faces
Пример #5
0
def main():
    """Main body of the script."""
    # input / output variables
    musclepath = '/Users/jbloom/muscle3.8/'
    outdir = 'redesigned_sequences'  # output directory
    remove_targets = {  # targets for removing motifs
        # name : (out name, sequence file, comparison file, codonmotifs)
        'PR8-NP':
        ('PR8-NP-lowCTG', 'sequences/PR8-NP-cDNA.fasta',
         'comparison_sequence_sets/NP_aligned.fasta', ['GTG', 'ATG', 'CTG'])
    }
    addctg_targets = { # targets for adding CTG motifs in frame 1
     # name : (out name, sequence file, comparison file)
        'PR8-NP-lowCTG' : ('PR8-NP-highCTG', '%s/PR8-NP-lowCTG.fasta' % outdir, 'comparison_sequence_sets/NP_aligned.fasta'),
    }
    count_threshold = 100  # any new codons introduced must be found naturally in at least this many sequences
    spliced_segments = [
        # (spliced name, first gene, its start, its end, second gene, its start, first exon end, second exon start, its end) with all numbering 1, 2, 3 from beginning of spliced gene
        ('PR8-M', 'PR8-M1', 1, 756, 'PR8-M2', 1, 27, 716, 979),
        ('PR8-NS', 'PR8-NS1', 1, 693, 'PR8-NS2', 1, 30, 503, 909),
    ]
    nonoverlap_range = {
    }  # gives range of non-overlapping reading frame portion
    for tup in spliced_segments:
        nonoverlap_range[tup[1]] = (tup[6], tup[7])
        s2length = tup[8] - tup[7] + tup[6] - tup[5]
        nonoverlap_range[tup[4]] = (max(tup[6], tup[3] - tup[7]), s2length)

    # begin removing motifs
    for (name, (outname, seqfile, comparisonfile,
                codonmotifs)) in remove_targets.iteritems():
        print "\nRedesigning %s to create %s." % (name, outname)
        originalseq = fasta.Read(seqfile)
        assert len(originalseq) == 1
        originalseq = originalseq[0]
        (head, seq) = originalseq
        seq = seq.upper()
        assert len(seq) % 3 == 0
        ncodons = len(seq) / 3
        comparison_seqs = fasta.Read(comparisonfile)
        print "Will use %d comparison sequences for the redesign." % len(
            comparison_seqs)
        for codonmotif in codonmotifs:
            if len(codonmotif) != 3:
                raise IOError(
                    "Method currently only works for 3-nucleotide motifs.")
            for icodon in range(1, ncodons + 1):
                for iframe in [3, 2, 1]:
                    if icodon == ncodons and iframe in [2, 3]:
                        continue  # don't look past the end of the last codon
                    index = 3 * (icodon - 1) + iframe - 1
                    if seq[index:index + 3] == codonmotif:
                        newseq = RemoveCodonMotif(seq, codonmotif, icodon,
                                                  iframe, comparison_seqs,
                                                  count_threshold)
                        assert fasta.Translate([
                            ('newseq', newseq)
                        ])[0][1] == fasta.Translate([('seq', seq)])[0][
                            1], "icodon = %d, iframe = %d, seq[index : index + 3] = %s, newseq[index : index + 3] = %s, seq[3 * (icodon - 1) : 3 * (icodon + 1)] = %s, newseq[3 * (icodon - 1) : 3 * (icodon + 1)] = %s" % (
                                icodon, iframe, seq[index:index + 3],
                                newseq[index:index + 3],
                                seq[3 * (icodon - 1):3 *
                                    (icodon + 1)], newseq[3 * (icodon - 1):3 *
                                                          (icodon + 1)])
                        seq = newseq
        # if the gene has overlapping reading frames, only use recoded portions of non-overlapping
        if name in nonoverlap_range:
            print "Adjusting redesign of %s to account for the overlapping reading frames." % name
            (start_no, end_no) = nonoverlap_range[name]
            (startcodon_no, endcodon_no) = (start_no / 3 + 1, end_no / 3 - 1)
            seq = "%s%s%s" % (originalseq[1][:3 * startcodon_no],
                              seq[3 * startcodon_no:3 * endcodon_no],
                              originalseq[1][3 * endcodon_no:])
        assert fasta.Translate([originalseq])[0][1] == fasta.Translate([
            ('seq', seq)
        ])[0][1]  # make sure the protein sequence is unchanged
        header = "%s: %s redesigned to eliminate possible alternative start motifs. The protein sequence is unchanged, and codons are only used if they occur in %d of %d natural sequences." % (
            outname, name, count_threshold, len(comparison_seqs))
        for codonmotif in codonmotifs:
            for iframe in [1, 2, 3]:
                s = "%s in frame %d reduced from %d to %d." % (
                    codonmotif, iframe,
                    CountMotifsInFrame(originalseq[1], codonmotif, iframe),
                    CountMotifsInFrame(seq, codonmotif, iframe))
                print s
                header = "%s %s" % (header, s)
        outfile = '%s/%s.fasta' % (outdir, outname)
        print "Writing %s to %s." % (outname, outfile)
        fasta.Write([(header, seq)], outfile)

    # now add CTG motifs
    targetcodon = 'CTG'
    targetaa = fasta.Translate([('codon', targetcodon)])[0][1]
    for (name, (outname, seqfile,
                comparisonfile)) in addctg_targets.iteritems():
        print "\nRedesigning %s to create %s." % (name, outname)
        originalseq = fasta.Read(seqfile)
        assert len(originalseq) == 1
        originalseq = originalseq[0]
        (head, seq) = originalseq
        seq = seq.upper()
        assert len(seq) % 3 == 0
        ncodons = len(seq) / 3
        comparison_seqs = fasta.Read(comparisonfile)
        print "Will use %d comparison sequences for the redesign." % len(
            comparison_seqs)
        for icodon in range(ncodons):
            codon = seq[icodon * 3:icodon * 3 + 3]
            aa = fasta.Translate([('codon', codon)])[0][1]
            if aa == targetaa and codon != targetcodon:  # a replacement possibility
                ctgcounts = CountCodonOccurrences(comparison_seqs, icodon + 1,
                                                  [targetcodon])[targetcodon]
                if ctgcounts >= count_threshold:  # replacement possible
                    newseq = "%s%s%s" % (seq[:icodon * 3], targetcodon,
                                         seq[icodon * 3 + 3:])
                    assert fasta.Translate([
                        ('newseq', newseq)
                    ])[0][1] == fasta.Translate([
                        ('seq', seq)
                    ])[0][1]  # make sure the protein sequence is unchanged
                    seq = newseq
        assert fasta.Translate([originalseq])[0][1] == fasta.Translate([
            ('seq', seq)
        ])[0][1]  # make sure the protein sequence is unchanged
        s = "%s in frame 1 increased from %d to %d." % (
            targetcodon, CountMotifsInFrame(originalseq[1], codonmotif, 1),
            CountMotifsInFrame(seq, codonmotif, 1))
        print s
        header = "%s: %s redesigned to add %s motifs in frame 1. The protein sequence is unchanged, and codons are only used if they occur in %d of %d natural sequences." % (
            outname, name, targetcodon, count_threshold, len(comparison_seqs))
        header = "%s %s" % (header, s)
        outfile = '%s/%s.fasta' % (outdir, outname)
        print "Writing %s to %s." % (outname, outfile)
        fasta.Write([(header, seq)], outfile)
    # write full alternatively spliced segment coding regions
    for (name, g1, g1_start, g1_end, g2, g2e1_start, g2e1_end, g2e2_start,
         g2e2_end) in spliced_segments:
        outname = "%s-lowCTG" % name
        (g1name, g2name) = ('%s-lowCTG' % g1, '%s-lowCTG' % g2)
        g1_file = '%s/%s.fasta' % (outdir, g1name)
        g2_file = '%s/%s.fasta' % (outdir, g2name)
        outfile = "%s/%s.fasta" % (outdir, outname)
        print "Creating %s from %s and %s." % (outname, g1name, g2name)
        (g1_head, g1_seq) = fasta.Read(g1_file)[0]
        (g2_head, g2_seq) = fasta.Read(g2_file)[0]
        merged = "%s%s" % (g1_seq[:g2e2_start - 1], g2_seq[g2e1_end:])
        assert g1_seq in merged
        a = align.Align([(outname, merged), (g1name, g1_seq),
                         (g2name, g2_seq)], musclepath, 'MUSCLE')
        a = align.AddDots(a)
        print "Here is the alignment:\n>%s\n%s\n>%s\n%s\n>%s\n%s" % (
            a[0][0], a[0][1], a[1][0], a[1][1], a[2][0], a[2][1])
        print "Writing %s to %s" % (outname, outfile)
        header = "%s, made by merging the following: %s; %s" % (
            outname, g1_head, g2_head)
        fasta.Write([(header, merged)], outfile)
Пример #6
0
def calibrate(
    pack='D21/eightpack',
    I_tof_dir='Si-I_tof',
    peak_fractional_width=0.02,
    dvalues=[
        1.10860231, 1.24596143, 1.357755, 1.63751414, 1.92015553, 3.13560085
    ],
    dmin=2.5,
    dmax=3.5,
    maxchisq=5.,
    min_counts=800,
    T0_of_E=SEQ_T0_of_E,
    l2table_nxs='./L2table.nxs',
    geometrical_constraints=None,
    align=True,
):
    """
    geometrical_constraints: ex. dict(Xposition=(-0.005, 0.005))
    """
    packname, packtype = pack.split('/')
    fitter = get_difc_from_Itof.Fitter(
        peak_fractional_width=peak_fractional_width,
        bg_type='linear',
        t0_range=(0, 0.01))

    L2 = load_L2_from_nxs(l2table_nxs)

    detIDs = np.load(os.path.join(I_tof_dir, 'detIDs.npy'))
    detID_list = list(detIDs)

    import yaml
    packinfo = yaml.load(
        open(os.path.join(I_tof_dir, 'pack-%s.yaml' % packname)))
    firstpixelID = packinfo['pixelIDs']['first']
    firstpixel_index = detID_list.index(firstpixelID)
    L2_pack = L2[firstpixel_index:firstpixel_index + 1024]

    gpd = get_difc_from_Itof.GetPackDifc(
        pack=packname,
        dvalues=dvalues,
        dmin=dmin,
        dmax=dmax,
        I_tof_dir=I_tof_dir,
        fitter=fitter,
        maxchisq=maxchisq,
        min_counts=min_counts,
        T0_of_E=T0_of_E,
        L2=L2_pack,
    )

    difc, mask, signature_d = gpd()
    np.save(os.path.join(I_tof_dir, 'difc-%s.npy' % packname), difc)
    np.save(os.path.join(I_tof_dir, 'mask-%s.npy' % packname), mask)

    if align:
        import align
        alignment = align.Align(I_tof_dir)
        if geometrical_constraints:
            alignment.options.update(geometrical_constraints)
        alignment.load_L2_from_nxs(l2table_nxs)
        alignment.align(difc,
                        mask,
                        packname,
                        ofile=open('new-%s.xml' % packname, 'wt'))
    return difc, mask
Пример #7
0
 def __init__(self):
     super(PrettyPrint, self).__init__()
     self.items = align.Align(lj=True, titles=1)
     self.items.add(['Type', 'Name', 'Major', 'Minor'])
     return
Пример #8
0
	def	pre_begin_file( self, name = None ):
		self.items = align.Align( titles = 1 )
		return
Пример #9
0
 def pre_open_file(self):
     self.items = align.Align(titles=1)
     return
Пример #10
0
 def _prepare(self):
     self.timestamp = None
     self.entries = align.Align(titles=1)
     return
    'data/genes/caePb2.augustusGene.txt')

genome_jap1 = genomereader.GenomeReader(
    'data/genome/caeJap1/chrUn.fa.gz',
    'data/genes/caeJap1.augustusGene.txt')


genome_rem3 = genomereader.GenomeReader(
    'data/genome/caeRem3/chrUn.fa.gz',
    'data/genes/caeRem3.augustusGene.txt')

# all genomes have the same genes
#all_gene_names = [g["name"] for g in genome_pb2.genes]
all_gene_names = genome_pb2.genes.keys()

a = align.Align("pam250", 5)  # eg. blosum62, pam250, pam30, rao ... or PAM250.txt (directly from file)

#sum of scores of global alignments of nucleotids over all genes with
# linear gap penalty 5

rem_jap_nucleotid_seq_pam = 0
pb_jap_nucleotid_seq_pam = 0
rem_pb_nucleotid_seq_pam = 0

#sum of scores of alignments of aminoacid sequnces over all genes with
#  pam250 scoring matrix
#  and linear gap penalty 5

rem_jap_aminoacid_seq_pam = 0
pb_jap_aminoacid_seq_pam = 0
rem_pb_aminoacid_seq_pam = 0
Пример #12
0
genome_jap1 = genomereader.GenomeReader(
    GENOMES_PATH + 'caeJap1/chrUn.fa', GENES_PATH + 'caeJap1.augustusGene.txt')
genome_rem3 = genomereader.GenomeReader(
    GENOMES_PATH + 'caeRem3/chrUn.fa', GENES_PATH + 'caeRem3.augustusGene.txt')

gene_names = [g['name'] for g in genome_pb2.genes]

alignment_scores_pb_jap = {}
alignment_scores_pb_rem = {}
alignment_scores_jap_rem = {}

amino_alignment_scores_pb_jap = {}
amino_alignment_scores_pb_rem = {}
amino_alignment_scores_jap_rem = {}

amino_a = align.Align('blosum62', 1)
dna_a = align.Align('dnafull.txt', 1)

for name in gene_names:
    n1 = genome_pb2.join_exons(name)
    n2 = genome_jap1.join_exons(name)
    n3 = genome_rem3.join_exons(name)

    alignment_scores_pb_jap[name] = dna_a.global_alignment(n1, n2)[0]
    alignment_scores_pb_rem[name] = dna_a.global_alignment(n1, n3)[0]
    alignment_scores_jap_rem[name] = dna_a.global_alignment(n2, n3)[0]

    a1 = genome_pb2.get_amino_acid_sequence(name)
    a2 = genome_jap1.get_amino_acid_sequence(name)
    a3 = genome_rem3.get_amino_acid_sequence(name)
Пример #13
0
    def main(self):
        '''
        purpose: main function to run shortstack
        Runs in the following order: 
        - parse_input.py: parse input files
        - encoder.py: match basecalls with sequences from encoding file
        - align.py: run first round of FTM
        
        '''

        #########################
        ####   Parse Input   ####
        #########################
        # instantiate parsing class from parse_input.py
        parse = parse_input.Parse_files(self.input_s6, self.output_dir,
                                        self.target_fa, self.mutation_vcf,
                                        self.encoding_file, self.qc_threshold,
                                        self.num_cores)

        s6_df, qc_df, mutation_df, encoding_df, fasta_df = parse.main_parser()

        ###################################
        ####   Encode S6    #####
        ###################################
        log.info("Reads encoded using file:\n {}".format(self.encoding_file))
        # instantiate encoder class from encoder.py
        encode = encoder.Encode_files(s6_df, encoding_df)
        # return dataframe of targets found for each molecule
        encoded_df, parity_df = encode.map_basecalls(encoding_df=encoding_df,
                                                     s6_df=s6_df,
                                                     dropna=False)
        # add parity check information to qc_df
        # qc_df comes from parse_input.py
        # parity_df comes from encoder.py
        qc_df = pd.concat([qc_df, parity_df], axis=0)
        qc_df.set_index("FeatureID", inplace=True, drop=True)

        ###################################
        ####   Assemble Mutations    #####
        ###################################
        ## Supervised mode only ##
        # if mutations are provided, assemble mutation seqs from mutation_vcf
        if self.mutation_vcf != "none":
            log.info("Mutations assembled from:\n {}".format(
                self.mutation_vcf))
            # instantiate aligner module
            assembler = assemble.AssembleMutations(fasta_df, mutation_df,
                                                   self.run_info_file, s6_df)

            # add mutated reference sequences to fasta_df
            mutant_fasta = assembler.main()
        # no mutations provided = unsupervised mode and mutant_fasta is empty
        else:
            mut_message = "No mutations provided. Entering unsupervised mode."
            print(mut_message)
            log.info(mut_message)
            mutant_fasta = ""

        ###############
        ###   FTM   ###
        ###############
        align_message = "Running FTM for perfect matches.\n"
        print(align_message)
        log.info(align_message)

        # instantiate alignment module from align.py
        aligner = align.Align(fasta_df, encoded_df, mutant_fasta,
                              self.detection_mode, self.deltaz_threshold,
                              self.kmer_length, self.output_dir,
                              self.diversity_threshold, self.qc_out_file,
                              self.run_info_file)
        # run first round of FTM
        ftm_df, nonmatches = aligner.main()

        #########################
        ####   Reporting    #####
        #########################
        qc_df.to_csv(self.qc_out_file, header=True, index=True, sep="\t")
Пример #14
0
 def pre_begin_file(self, name=None):
     super(PrettyPrint, self).pre_begin_file(name)
     self.entries = align.Align(lj=True)
     return
Пример #15
0
 def pre_begin_file(self):
     self.items = align.Align()
     return
Пример #16
0
    sample code for loading numbers.
    """
    #globs['username']=username

    if parent == None:
        app = wx.PySimpleApp(
            redirect=False
        )  # Create app object if not called from called from within another object.
    dlg = NumbersDialog(parent, data)
    dlg.Destroy()
    if parent == None:
        app.MainLoop()
    return


if __name__ == "__main__":
    d = [[
        'ajjjjjjjaa', 'bbb', 'ccc', 'aaa', 'bbb', 'ccc', 'aaa', 'bbb', 'ccc',
        'aaa', 'bbb', 'ccc'
    ], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
         [
             True, False, False, True, False, False, True, False, False, True,
             False, False
         ]]
    data = align.Align()
    data.set_trial_colum(colum_name='scenario4')
    data.make_z_from_scenario()
    data.solve()
    data.disp_solve()
    numbers(data)