示例#1
0
    def test_basic(self):
        with temp() as f:
            for i in range(10):
                f.write("%d\n" % i)

            for i in range(10):
                multitail._seek_to_n_lines_from_end(f, i)
                assert_equal(i, len(f.readlines()))
示例#2
0
    def test_short_file(self):
        """ The file is shorter than the number of lines we request """
        with temp() as f:
            for i in range(3):
                f.write("%d\n" % i)

            multitail._seek_to_n_lines_from_end(f, 100)
            assert_equal(3, len(f.readlines()))
示例#3
0
    def test_multiple_buffers(self):
        """ We need to buffer multiple times """
        with temp() as f:
            for i in range(10000):
                f.write("%d\n" % i)

            multitail._seek_to_n_lines_from_end(f, 5000)
            assert_equal(5000, len(f.readlines()))
示例#4
0
    def test_multiple_buffers(self):
        """ We need to buffer multiple times """
        with temp() as f:
            for i in range(10000):
                f.write("%d\n" % i)

            multitail._seek_to_n_lines_from_end(f, 5000)
            assert_equal(5000, len(f.readlines()))
示例#5
0
    def test_basic(self):
        with temp() as f:
            for i in range(10):
                f.write("%d\n" % i)

            for i in range(10):
                multitail._seek_to_n_lines_from_end(f, i)
                assert_equal(i, len(f.readlines()))
示例#6
0
    def test_short_file(self):
        """ The file is shorter than the number of lines we request """
        with temp() as f:
            for i in range(3):
                f.write("%d\n" % i)

            multitail._seek_to_n_lines_from_end(f, 100)
            assert_equal(3, len(f.readlines()))
示例#7
0
 def setUp(self):
     tmpf = temp(delete=False)
     self.fname = tmpf.name + '_db.sql'
     tmpf.close()
     self.dao = FriskbyDao(self.fname)
示例#8
0
 def __setstate__(self, state):
     with temp(suffix='.hdf5', delete=True) as fd:
         fd.write(state['model_str'])
         fd.flush()
         model = load_keras_model(fd.name)
     self.__dict__ = model.__dict__
示例#9
0
 def __getstate__(self):
     with temp(suffix='.hdf5', delete=True) as f:
         save_model(self, f.name, overwrite=True)
         model_str = f.read()
     return {'model_str': model_str}
def create_array(Bedfiles,
                 Bamfiles,
                 measure='FPKM',
                 max_workers=15,
                 sorted=False):
    mat = np.zeros((len(Bedfiles), len(Bedfiles)))
    PyBedfiles = dict()

    # create bedfiles
    print("Collecting bedfiles... ")
    for i, Bedfile in zip(range(len(Bedfiles)), Bedfiles):
        print("Obtaining " + Bedfile)
        PyBedfiles[Bedfile] = pb.BedTool(Bedfile)

    if len(Bedfiles) > 1:
        print("Obtaining unions of binding sites")
        UnionSite = PyBedfiles[Bedfile]
        for Bedfile in PyBedfiles:
            UnionSite = UnionSite.cat(PyBedfiles[Bedfile])
    else:
        UnionSite = PyBedfiles[Bedfiles[0]]

    if sorted:
        print("Sorted bam/bed files are given. Extracting chromosome names")
        #tmpfile = UnionSite._tmp()
        #os.system('sort {0} -k1,1 -k2,2n > {1}'.format(UnionSite.fn, tmpfile))
        with temp('w') as f:
            command = """samtools view -H """ + Bamfiles[
                0] + """ | grep SQ | cut -f 2,3 | awk '{sub(/^SN:/,""); sub(/LN:/,""); print;}' >  """ + f.name
            os.system(command)
            UnionSite = UnionSite.sort(faidx=f.name)
        #UnionSite = pb.BedTool(tmpfile)

    # reading bam reads
    bamreads = [None] * len(Bamfiles)
    futures = []
    print("Calculating read counts from bam files")
    with cf.ThreadPoolExecutor(max_workers=max_workers) as e:
        for order in range(len(Bamfiles)):
            futures.append(
                e.submit(readcount, UnionSite, Bamfiles[order], order, sorted))

        for future in cf.as_completed(futures):
            order, read = future.result()
            bamreads[order] = read

    # measuring total number of reads
    if measure in ['FPKM', 'CPM']:
        print('Obtaining library depth..')
        Nreads = [None] * len(Bamfiles)
        futures = []
        with cf.ThreadPoolExecutor(max_workers=max_workers) as e:
            for order in range(len(Bamfiles)):
                futures.append(e.submit(flagstats, Bamfiles[order], order))

            for future in cf.as_completed(futures):
                read, order = future.result()
                Nreads[order] = float(read)

    print("Calculating " + measure)
    counts = np.zeros((len(UnionSite), len(Bamfiles)))
    for i in range(len(Bamfiles)):
        for j, site in enumerate(bamreads[i]):
            if measure == 'FPKM':
                counts[j, i] = np.log2(
                    float(site[-1]) *
                    (1000000000 / Nreads[i]) / float(site.length) + 1)
            elif measure == 'CPM':
                counts[j, i] = np.log2(
                    float(site[-1]) * (1000000000 / Nreads[i]) + 1)
            else:
                counts[j, i] = int(site[-1])

    df = pd.concat(
        [UnionSite.to_dataframe(),
         pd.DataFrame(counts, columns=Bamfiles)],
        axis=1)

    return df
示例#11
0
 def _temp_fname(self, postfix=''):
     tmpf = temp(delete=False)
     fname = tmpf.name + postfix
     tmpf.close()
     return fname
示例#12
0
def parse_ply_nx(f, colors, eig=False,  network=False):
	output = {color:[] for color in colors}
	output_indices = {color:[] for color in colors}
	output_graphs = {color:Graph() for color in colors}
	# if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.")
	# if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.")
	header_line = f.readline()
	if not "ply" in header_line: 
		raise IOError("can only read text ply files")
	while header_line != "end_header\n":
		print header_line,
	 	if "element" in header_line:
			if "vertex" in header_line:
				vertex_number = int(header_line.split()[-1])
			if "face" in header_line:
				face_number = int(header_line.split()[-1])
		header_line = f.readline()
	vertices = np.memmap(temp(), dtype="float_", mode="w+",
	                   	             shape=(vertex_number, 10))
	for i, line in enumerate(f):
		####YOU CAN DO BETTER THAN THIS... READ THE NUMBER OF ELEMENTS  AND ITERATE ON THAT, LIKE CROWBAR
		data = line.split()
		if len(data) <  10:
			nodes = [int(node_index) for node_index in data[1:]]
			for color in colors:
				colored_indices = output_indices[color]
				colored_nodes = [node for node in nodes if tuple(vertices[node,3:7]) == color]
				if colored_nodes:
					#print 'edge'
					output_graphs[color].add_edges_from(combinations(colored_nodes,  2))
			if not i % 10000: print "processing face %i/%i..." % (i - vertex_number, face_number)
		else:
			x, y, z,\
			nx, ny, nz,\
			r, g, b, alpha = data
			color = tuple((int(r), int(g), int(b), int(alpha)))
			normal = np.array((float(nx), float(ny), float(nz)))
			if color in colors:
				if eig:
					position = np.array((float(x), float(y), float(z)))
					output[color].append(position)
				elif network:
					output_indices[color].append(i)
					vertices[i,:] = np.array((float(x), float(y), float(z),float(r),int(g),int(b),int(alpha), float(nx), float(ny), float(nz)))
				else:
					normal = normal/np.linalg.norm(normal)
					output[color].append(calc_sphere(*normal))
			if not i % 10000: print "processing node %i/%i..." % (i, vertex_number)
	if eig:
		for color in colors:
			output[color] = (calc_sphere(*general_axis(np.array(output[color]), -1)),)
	elif network:
		for color in colors:
			if __debug__: print "processing network for color ", color
			for plane_vertices_indices in connected_components(output_graphs[color]):
				colored_vertices = vertices[plane_vertices_indices,:3]
				dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1))
				X, Y, Z = colored_vertices[:, :3].mean(axis=0)
				highest_vertex = colored_vertices[np.argmax(colored_vertices[:,2]),:]
				lowest_vertex = colored_vertices[np.argmin(colored_vertices[:,2]),:]
				trace = np.linalg.norm(highest_vertex - lowest_vertex)

				direction_cosines = normalized(vertices[plane_vertices_indices, 7:])
				n = direction_cosines.shape[0]
				resultant_vector = np.sum(direction_cosines, axis=0)
				fisher_k = (n - 1)/(n - np.linalg.norm(resultant_vector))

				direction_tensor = np.dot(direction_cosines.T, direction_cosines)
				eigen_values, eigen_vectors = np.linalg.eigh(direction_tensor)
				eigen_values_order = (-eigen_values).argsort()
				
				first_eigenvalue,\
				second_eigenvalue,\
				third_eigenvalue = eigen_values[eigen_values_order]
				
				first_eigenvector,\
				second_eigenvector,\
				third_eigenvector = eigen_vectors[:,eigen_values_order].T
				
				#From Vollmer 1990
				vollmer_P = (first_eigenvalue - second_eigenvalue)/n
				vollmer_G = 2*(second_eigenvalue - third_eigenvalue)/n
				vollmer_R = 3*third_eigenvalue/n
				
				vollmer_B = vollmer_P + vollmer_G

				output[color].append((dipdir, dip, X, Y, Z, trace, n, fisher_k, vollmer_P, vollmer_G, vollmer_R, vollmer_B))
	#embed()
	return output
示例#13
0
def parse_ply_nx(f, colors, eig=False, network=False):
    output = {color: [] for color in colors}
    output_indices = {color: [] for color in colors}
    output_graphs = {color: Graph() for color in colors}
    # if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.")
    # if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.")
    header_line = f.readline()
    if not "ply" in header_line:
        raise IOError("can only read text ply files")
    while header_line != "end_header\n":
        print header_line,
        if "element" in header_line:
            if "vertex" in header_line:
                vertex_number = int(header_line.split()[-1])
            if "face" in header_line:
                face_number = int(header_line.split()[-1])
        header_line = f.readline()
    vertices = np.memmap(temp(),
                         dtype="float_",
                         mode="w+",
                         shape=(vertex_number, 7))
    for i, line in enumerate(f):
        data = line.split()
        if len(data) != 10:
            nodes = [int(node_index) for node_index in data[1:]]
            for color in colors:
                colored_indices = output_indices[color]
                colored_nodes = [
                    node for node in nodes
                    if tuple(vertices[node, -4:]) == color
                ]
                if colored_nodes:
                    #print 'edge'
                    output_graphs[color].add_edges_from(
                        combinations(colored_nodes, 2))
            if not i % 10000:
                print "processing face %i/%i..." % (i - vertex_number,
                                                    face_number)
        else:
            x, y, z,\
            nx, ny, nz,\
            r, g, b, alpha = data
            color = tuple((int(r), int(g), int(b), int(alpha)))
            normal = np.array((float(nx), float(ny), float(nz)))
            if color in colors:
                if eig:
                    position = np.array((float(x), float(y), float(z)))
                    output[color].append(position)
                elif network:
                    output_indices[color].append(i)
                    vertices[i, :] = np.array(
                        (float(x), float(y), float(z), float(r), int(g),
                         int(b), int(alpha)))
                else:
                    normal = normal / np.linalg.norm(normal)
                    output[color].append(calc_sphere(*normal))
            if not i % 10000:
                print "processing node %i/%i..." % (i, vertex_number)
    if eig:
        for color in colors:
            output[color] = (calc_sphere(
                *general_axis(np.array(output[color]), -1)), )
    elif network:
        for color in colors:
            if __debug__: print "processing network for color ", color
            for plane_vertices_indices in connected_components(
                    output_graphs[color]):
                colored_vertices = vertices[plane_vertices_indices, :3]
                dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1))
                X, Y, Z = colored_vertices[:, :3].mean(axis=0)
                highest_vertex = colored_vertices[
                    np.argmax(colored_vertices[:, 2]), :]
                lowest_vertex = colored_vertices[
                    np.argmin(colored_vertices[:, 2]), :]
                trace = np.linalg.norm(highest_vertex - lowest_vertex)
                output[color].append((dipdir, dip, X, Y, Z, trace))
    #embed()
    return output
示例#14
0
def parse_ply_nx(f, colors, eig=False,  network=False):
    output = {color:[] for color in colors}
    output_indices = {color:[] for color in colors}
    output_graphs = {color:Graph() for color in colors}
    # if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.")
    # if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.")
    header_line = f.readline()
    if not "ply" in header_line: 
        raise IOError("can only read text ply files")
    while header_line != "end_header\n":
        print header_line,
        if "element" in header_line:
            if "vertex" in header_line:
                vertex_number = int(header_line.split()[-1])
            if "face" in header_line:
                face_number = int(header_line.split()[-1])
        header_line = f.readline()
    vertices = np.memmap(temp(), dtype="float_", mode="w+",
                                     shape=(vertex_number, 7))
    for i, line in enumerate(f):
        data = line.split()
        if len(data) !=  10:
            nodes = [int(node_index) for node_index in data[1:]]
            for color in colors:
                colored_indices = output_indices[color]
                colored_nodes = [node for node in nodes if tuple(vertices[node,-4:]) == color]
                if colored_nodes:
                    #print 'edge'
                    output_graphs[color].add_edges_from(combinations(colored_nodes,  2))
            if not i % 10000: print "processing face %i/%i..." % (i - vertex_number, face_number)
        else:
            x, y, z,\
            nx, ny, nz,\
            r, g, b, alpha = data
            color = tuple((int(r), int(g), int(b), int(alpha)))
            normal = np.array((float(nx), float(ny), float(nz)))
            if color in colors:
                if eig:
                    position = np.array((float(x), float(y), float(z)))
                    output[color].append(position)
                elif network:
                    output_indices[color].append(i)
                    vertices[i,:] = np.array((float(x), float(y), float(z),float(r),int(g),int(b),int(alpha)))
                else:
                    normal = normal/np.linalg.norm(normal)
                    output[color].append(calc_sphere(*normal))
            if not i % 10000: print "processing node %i/%i..." % (i, vertex_number)
    if eig:
        for color in colors:
            output[color] = (calc_sphere(*general_axis(np.array(output[color]), -1)),)
    elif network:
        for color in colors:
            if __debug__: print "processing network for color ", color
            for plane_vertices_indices in connected_components(output_graphs[color]):
                colored_vertices = vertices[plane_vertices_indices,:3]
                dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1))
                X, Y, Z = colored_vertices[:, :3].mean(axis=0)
                highest_vertex = colored_vertices[np.argmax(colored_vertices[:,2]),:]
                lowest_vertex = colored_vertices[np.argmin(colored_vertices[:,2]),:]
                trace = np.linalg.norm(highest_vertex - lowest_vertex)
                output[color].append((dipdir, dip, X, Y, Z, trace))
    #embed()
    return output
示例#15
0
def parse_ply_nx(f, colors, eig=False, network=False):
    output = {color: [] for color in colors}
    output_indices = {color: [] for color in colors}
    output_graphs = {color: Graph() for color in colors}
    # if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.")
    # if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.")
    header_line = f.readline()
    if not "ply" in header_line:
        raise IOError("can only read text ply files")
    while header_line != "end_header\n":
        print header_line,
        if "element" in header_line:
            if "vertex" in header_line:
                vertex_number = int(header_line.split()[-1])
            if "face" in header_line:
                face_number = int(header_line.split()[-1])
        header_line = f.readline()
    vertices = np.memmap(temp(),
                         dtype="float_",
                         mode="w+",
                         shape=(vertex_number, 10))
    for i, line in enumerate(f):
        ####YOU CAN DO BETTER THAN THIS... READ THE NUMBER OF ELEMENTS  AND ITERATE ON THAT, LIKE CROWBAR
        data = line.split()
        if len(data) < 10:
            nodes = [int(node_index) for node_index in data[1:]]
            for color in colors:
                colored_indices = output_indices[color]
                colored_nodes = [
                    node for node in nodes
                    if tuple(vertices[node, 3:7]) == color
                ]
                if colored_nodes:
                    #print 'edge'
                    output_graphs[color].add_edges_from(
                        combinations(colored_nodes, 2))
            if not i % 10000:
                print "processing face %i/%i..." % (i - vertex_number,
                                                    face_number)
        else:
            x, y, z,\
            nx, ny, nz,\
            r, g, b, alpha = data
            color = tuple((int(r), int(g), int(b), int(alpha)))
            normal = np.array((float(nx), float(ny), float(nz)))
            if color in colors:
                if eig:
                    position = np.array((float(x), float(y), float(z)))
                    output[color].append(position)
                elif network:
                    output_indices[color].append(i)
                    vertices[i, :] = np.array(
                        (float(x), float(y), float(z), float(r), int(g),
                         int(b), int(alpha), float(nx), float(ny), float(nz)))
                else:
                    normal = normal / np.linalg.norm(normal)
                    output[color].append(calc_sphere(*normal))
            if not i % 10000:
                print "processing node %i/%i..." % (i, vertex_number)
    if eig:
        for color in colors:
            output[color] = (calc_sphere(
                *general_axis(np.array(output[color]), -1)), )
    elif network:
        for color in colors:
            if __debug__: print "processing network for color ", color
            for plane_vertices_indices in connected_components(
                    output_graphs[color]):
                colored_vertices = vertices[plane_vertices_indices, :3]
                dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1))
                X, Y, Z = colored_vertices[:, :3].mean(axis=0)
                highest_vertex = colored_vertices[
                    np.argmax(colored_vertices[:, 2]), :]
                lowest_vertex = colored_vertices[
                    np.argmin(colored_vertices[:, 2]), :]
                trace = np.linalg.norm(highest_vertex - lowest_vertex)

                direction_cosines = normalized(vertices[plane_vertices_indices,
                                                        7:])
                n = direction_cosines.shape[0]
                resultant_vector = np.sum(direction_cosines, axis=0)
                fisher_k = (n - 1) / (n - np.linalg.norm(resultant_vector))

                direction_tensor = np.dot(direction_cosines.T,
                                          direction_cosines)
                eigen_values, eigen_vectors = np.linalg.eigh(direction_tensor)
                eigen_values_order = (-eigen_values).argsort()

                first_eigenvalue,\
                second_eigenvalue,\
                third_eigenvalue = eigen_values[eigen_values_order]

                first_eigenvector,\
                second_eigenvector,\
                third_eigenvector = eigen_vectors[:,eigen_values_order].T

                #From Vollmer 1990
                vollmer_P = (first_eigenvalue - second_eigenvalue) / n
                vollmer_G = 2 * (second_eigenvalue - third_eigenvalue) / n
                vollmer_R = 3 * third_eigenvalue / n

                vollmer_B = vollmer_P + vollmer_G

                output[color].append(
                    (dipdir, dip, X, Y, Z, trace, n, fisher_k, vollmer_P,
                     vollmer_G, vollmer_R, vollmer_B))
    #embed()
    return output