Exemplo n.º 1
0
    def __init__(self, fasta, length=None, number=None, k=1, matrix_only=False):
        self.k = k

        # Initialize super Fasta object
        Fasta.__init__(self)
        
        # Initialize Markov transition matrix
        self._initialize_matrices(fasta.seqs, k=k)

        if matrix_only:
            return
        
        c = 0
        if not number:
            number = len(fasta)

        while len(self) < number:
            seq = choice(fasta.seqs)
            id = "random_Markov%s_%s" % (k,c)
            if length:
                random_seq = self._generate_sequence(length)
            else:
                random_seq = self._generate_sequence(len(seq))
            self.add(id, random_seq)    
            c += 1
Exemplo n.º 2
0
    def __init__(self, fasta, size=None, n=None, k=1, matrix_only=False):
        self.k = k

        # Initialize super Fasta object
        Fasta.__init__(self)

        # Initialize Markov transition matrix
        self._initialize_matrices(fasta.seqs, k=k)

        if matrix_only:
            return

        c = 0
        if not n:
            n = len(fasta)

        while len(self) < n:
            seq = choice(fasta.seqs)
            name = "random_Markov%s_%s" % (k, c)
            if size:
                random_seq = self._generate_sequence(size)
            else:
                random_seq = self._generate_sequence(len(seq))
            self.add(name, random_seq)
            c += 1
Exemplo n.º 3
0
    def __init__(self, matchfile, genome="hg19", number=None, size=None):
        # Create temporary files
        tmpbed = NamedTemporaryFile(dir=mytmpdir()).name
        tmpfasta = NamedTemporaryFile(dir=mytmpdir()).name

        # Create bed-file with coordinates of random sequences
        matched_gc_bedfile(tmpbed, matchfile, genome, number, size=size)

        # Convert track to fasta
        Genome(genome).track2fasta(tmpbed, fastafile=tmpfasta)

        # Initialize super Fasta object
        Fasta.__init__(self, tmpfasta)

        # Delete the temporary files
        os.remove(tmpbed)
        os.remove(tmpfasta)
Exemplo n.º 4
0
	def __init__(self, fasta, length=None, multiply=10):
		
		# Initialize super Fasta object
		Fasta.__init__(self)
		
		# Initialize Markov transition matrix
		self._initialize_matrices(fasta.seqs)

		c = 0
		for seq in fasta.seqs:
			for i in range(multiply):
				id = "random_1st_order_%s" % (c)
				if length:
					random_seq = self._generate_sequence(length)
				else:
					random_seq = self._generate_sequence(len(seq))
				self.add(id, random_seq)	
				c += 1
Exemplo n.º 5
0
    def __init__(self, genome, size=None, n=None):
        size = int(size)

        # Create temporary files
        tmpbed = NamedTemporaryFile(dir=mytmpdir()).name
        tmpfasta = NamedTemporaryFile(dir=mytmpdir()).name

        # Create bed-file with coordinates of random sequences
        create_random_genomic_bedfile(tmpbed, genome, size, n)

        # Convert track to fasta
        Genome(genome).track2fasta(tmpbed, fastafile=tmpfasta, stranded=True)

        # Initialize super Fasta object
        Fasta.__init__(self, tmpfasta)

        # Delete the temporary files
        os.remove(tmpbed)
        os.remove(tmpfasta)
Exemplo n.º 6
0
	def __init__(self, bedfile, genefile, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, multiply=10, match_chromosome=True):
		self.match_chromosome = match_chromosome

		# Create temporary files
		tmpbed = NamedTemporaryFile().name
		tmpfasta = NamedTemporaryFile().name
		
		# Create bed-file with coordinates of random sequences
		self._create_bedfile(tmpbed, bedfile, genefile, length, multiply)
		
		# Convert track to fasta
		track2fasta(index, tmpbed, tmpfasta)

		# Initialize super Fasta object
		Fasta.__init__(self, tmpfasta)

		# Delete the temporary files
		os.remove(tmpbed)
		os.remove(tmpfasta)
Exemplo n.º 7
0
    def __init__(self, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, n=None):
        length = int(length)

        # Create temporary files
        tmpbed = NamedTemporaryFile(dir=mytmpdir()).name
        tmpfasta = NamedTemporaryFile(dir=mytmpdir()).name
        
        # Create bed-file with coordinates of random sequences
        create_random_genomic_bedfile(tmpbed, index, length, n)
        
        # Convert track to fasta
        track2fasta(index, tmpbed, tmpfasta, use_strand=True)

        # Initialize super Fasta object
        Fasta.__init__(self, tmpfasta)

        # Delete the temporary files
        os.remove(tmpbed)
        os.remove(tmpfasta)
Exemplo n.º 8
0
	def __init__(self, genefile, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, n=None):
		length = int(length)

		# Create temporary files
		tmpbed = NamedTemporaryFile().name
		tmpfasta = NamedTemporaryFile().name
		
		# Create bed-file with coordinates of random sequences
		self._create_promoter_bedfile(tmpbed, genefile, length, n)
		
		# Convert track to fasta
		track2fasta(index, tmpbed, tmpfasta, use_strand=True)

		# Initialize super Fasta object
		Fasta.__init__(self, tmpfasta)

		# Delete the temporary files
		os.remove(tmpbed)
		os.remove(tmpfasta)
Exemplo n.º 9
0
    def __init__(self, matchfile, genome="hg19", number=None):
        config = MotifConfig()
        index = os.path.join(config.get_index_dir(), genome)

        # Create temporary files
        tmpbed = NamedTemporaryFile(dir=mytmpdir()).name
        tmpfasta = NamedTemporaryFile(dir=mytmpdir()).name
        
        # Create bed-file with coordinates of random sequences
        matched_gc_bedfile(tmpbed, matchfile, genome, number)
        
        # Convert track to fasta
        track2fasta(index, tmpbed, tmpfasta)

        # Initialize super Fasta object
        Fasta.__init__(self, tmpfasta)

        # Delete the temporary files
        os.remove(tmpbed)
        os.remove(tmpfasta)
Exemplo n.º 10
0
	def __init__(self, bedfile, genefile, index="/usr/share/gimmemotifs/genome_index/hg18", length=None, multiply=10, match_chromosome=True):
		self.match_chromosome = match_chromosome
		length = int(length)

		# Create temporary files
		tmpbed = NamedTemporaryFile().name
		tmpfasta = NamedTemporaryFile().name
		
		# Create bed-file with coordinates of random sequences
		self._create_bedfile(tmpbed, bedfile, genefile, length, multiply)
		
		# Convert track to fasta
		track2fasta(index, tmpbed, tmpfasta)

		# Initialize super Fasta object
		Fasta.__init__(self, tmpfasta)

		# Delete the temporary files
		os.remove(tmpbed)
		os.remove(tmpfasta)
Exemplo n.º 11
0
	def __init__(self, fasta, length=None, multiply=10, k=1, matrix_only=False):
		
		
		self.k = k

		# Initialize super Fasta object
		Fasta.__init__(self)
		
		# Initialize Markov transition matrix
		self._initialize_matrices(fasta.seqs, k=k)

		if matrix_only:
			return
		
		c = 0
		for seq in fasta.seqs:
			for i in range(multiply):
				id = "random_Markov%s_%s" % (k,c)
				if length:
					random_seq = self._generate_sequence(length)
				else:
					random_seq = self._generate_sequence(len(seq))
				self.add(id, random_seq)	
				c += 1