def test_basic(self): with temp() as f: for i in range(10): f.write("%d\n" % i) for i in range(10): multitail._seek_to_n_lines_from_end(f, i) assert_equal(i, len(f.readlines()))
def test_short_file(self): """ The file is shorter than the number of lines we request """ with temp() as f: for i in range(3): f.write("%d\n" % i) multitail._seek_to_n_lines_from_end(f, 100) assert_equal(3, len(f.readlines()))
def test_multiple_buffers(self): """ We need to buffer multiple times """ with temp() as f: for i in range(10000): f.write("%d\n" % i) multitail._seek_to_n_lines_from_end(f, 5000) assert_equal(5000, len(f.readlines()))
def setUp(self): tmpf = temp(delete=False) self.fname = tmpf.name + '_db.sql' tmpf.close() self.dao = FriskbyDao(self.fname)
def __setstate__(self, state): with temp(suffix='.hdf5', delete=True) as fd: fd.write(state['model_str']) fd.flush() model = load_keras_model(fd.name) self.__dict__ = model.__dict__
def __getstate__(self): with temp(suffix='.hdf5', delete=True) as f: save_model(self, f.name, overwrite=True) model_str = f.read() return {'model_str': model_str}
def create_array(Bedfiles, Bamfiles, measure='FPKM', max_workers=15, sorted=False): mat = np.zeros((len(Bedfiles), len(Bedfiles))) PyBedfiles = dict() # create bedfiles print("Collecting bedfiles... ") for i, Bedfile in zip(range(len(Bedfiles)), Bedfiles): print("Obtaining " + Bedfile) PyBedfiles[Bedfile] = pb.BedTool(Bedfile) if len(Bedfiles) > 1: print("Obtaining unions of binding sites") UnionSite = PyBedfiles[Bedfile] for Bedfile in PyBedfiles: UnionSite = UnionSite.cat(PyBedfiles[Bedfile]) else: UnionSite = PyBedfiles[Bedfiles[0]] if sorted: print("Sorted bam/bed files are given. Extracting chromosome names") #tmpfile = UnionSite._tmp() #os.system('sort {0} -k1,1 -k2,2n > {1}'.format(UnionSite.fn, tmpfile)) with temp('w') as f: command = """samtools view -H """ + Bamfiles[ 0] + """ | grep SQ | cut -f 2,3 | awk '{sub(/^SN:/,""); sub(/LN:/,""); print;}' > """ + f.name os.system(command) UnionSite = UnionSite.sort(faidx=f.name) #UnionSite = pb.BedTool(tmpfile) # reading bam reads bamreads = [None] * len(Bamfiles) futures = [] print("Calculating read counts from bam files") with cf.ThreadPoolExecutor(max_workers=max_workers) as e: for order in range(len(Bamfiles)): futures.append( e.submit(readcount, UnionSite, Bamfiles[order], order, sorted)) for future in cf.as_completed(futures): order, read = future.result() bamreads[order] = read # measuring total number of reads if measure in ['FPKM', 'CPM']: print('Obtaining library depth..') Nreads = [None] * len(Bamfiles) futures = [] with cf.ThreadPoolExecutor(max_workers=max_workers) as e: for order in range(len(Bamfiles)): futures.append(e.submit(flagstats, Bamfiles[order], order)) for future in cf.as_completed(futures): read, order = future.result() Nreads[order] = float(read) print("Calculating " + measure) counts = np.zeros((len(UnionSite), len(Bamfiles))) for i in range(len(Bamfiles)): for j, site in enumerate(bamreads[i]): if measure == 'FPKM': counts[j, i] = np.log2( float(site[-1]) * (1000000000 / Nreads[i]) / float(site.length) + 1) elif measure == 'CPM': counts[j, i] = np.log2( float(site[-1]) * (1000000000 / Nreads[i]) + 1) else: counts[j, i] = int(site[-1]) df = pd.concat( [UnionSite.to_dataframe(), pd.DataFrame(counts, columns=Bamfiles)], axis=1) return df
def _temp_fname(self, postfix=''): tmpf = temp(delete=False) fname = tmpf.name + postfix tmpf.close() return fname
def parse_ply_nx(f, colors, eig=False, network=False): output = {color:[] for color in colors} output_indices = {color:[] for color in colors} output_graphs = {color:Graph() for color in colors} # if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.") # if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.") header_line = f.readline() if not "ply" in header_line: raise IOError("can only read text ply files") while header_line != "end_header\n": print header_line, if "element" in header_line: if "vertex" in header_line: vertex_number = int(header_line.split()[-1]) if "face" in header_line: face_number = int(header_line.split()[-1]) header_line = f.readline() vertices = np.memmap(temp(), dtype="float_", mode="w+", shape=(vertex_number, 10)) for i, line in enumerate(f): ####YOU CAN DO BETTER THAN THIS... READ THE NUMBER OF ELEMENTS AND ITERATE ON THAT, LIKE CROWBAR data = line.split() if len(data) < 10: nodes = [int(node_index) for node_index in data[1:]] for color in colors: colored_indices = output_indices[color] colored_nodes = [node for node in nodes if tuple(vertices[node,3:7]) == color] if colored_nodes: #print 'edge' output_graphs[color].add_edges_from(combinations(colored_nodes, 2)) if not i % 10000: print "processing face %i/%i..." % (i - vertex_number, face_number) else: x, y, z,\ nx, ny, nz,\ r, g, b, alpha = data color = tuple((int(r), int(g), int(b), int(alpha))) normal = np.array((float(nx), float(ny), float(nz))) if color in colors: if eig: position = np.array((float(x), float(y), float(z))) output[color].append(position) elif network: output_indices[color].append(i) vertices[i,:] = np.array((float(x), float(y), float(z),float(r),int(g),int(b),int(alpha), float(nx), float(ny), float(nz))) else: normal = normal/np.linalg.norm(normal) output[color].append(calc_sphere(*normal)) if not i % 10000: print "processing node %i/%i..." % (i, vertex_number) if eig: for color in colors: output[color] = (calc_sphere(*general_axis(np.array(output[color]), -1)),) elif network: for color in colors: if __debug__: print "processing network for color ", color for plane_vertices_indices in connected_components(output_graphs[color]): colored_vertices = vertices[plane_vertices_indices,:3] dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1)) X, Y, Z = colored_vertices[:, :3].mean(axis=0) highest_vertex = colored_vertices[np.argmax(colored_vertices[:,2]),:] lowest_vertex = colored_vertices[np.argmin(colored_vertices[:,2]),:] trace = np.linalg.norm(highest_vertex - lowest_vertex) direction_cosines = normalized(vertices[plane_vertices_indices, 7:]) n = direction_cosines.shape[0] resultant_vector = np.sum(direction_cosines, axis=0) fisher_k = (n - 1)/(n - np.linalg.norm(resultant_vector)) direction_tensor = np.dot(direction_cosines.T, direction_cosines) eigen_values, eigen_vectors = np.linalg.eigh(direction_tensor) eigen_values_order = (-eigen_values).argsort() first_eigenvalue,\ second_eigenvalue,\ third_eigenvalue = eigen_values[eigen_values_order] first_eigenvector,\ second_eigenvector,\ third_eigenvector = eigen_vectors[:,eigen_values_order].T #From Vollmer 1990 vollmer_P = (first_eigenvalue - second_eigenvalue)/n vollmer_G = 2*(second_eigenvalue - third_eigenvalue)/n vollmer_R = 3*third_eigenvalue/n vollmer_B = vollmer_P + vollmer_G output[color].append((dipdir, dip, X, Y, Z, trace, n, fisher_k, vollmer_P, vollmer_G, vollmer_R, vollmer_B)) #embed() return output
def parse_ply_nx(f, colors, eig=False, network=False): output = {color: [] for color in colors} output_indices = {color: [] for color in colors} output_graphs = {color: Graph() for color in colors} # if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.") # if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.") header_line = f.readline() if not "ply" in header_line: raise IOError("can only read text ply files") while header_line != "end_header\n": print header_line, if "element" in header_line: if "vertex" in header_line: vertex_number = int(header_line.split()[-1]) if "face" in header_line: face_number = int(header_line.split()[-1]) header_line = f.readline() vertices = np.memmap(temp(), dtype="float_", mode="w+", shape=(vertex_number, 7)) for i, line in enumerate(f): data = line.split() if len(data) != 10: nodes = [int(node_index) for node_index in data[1:]] for color in colors: colored_indices = output_indices[color] colored_nodes = [ node for node in nodes if tuple(vertices[node, -4:]) == color ] if colored_nodes: #print 'edge' output_graphs[color].add_edges_from( combinations(colored_nodes, 2)) if not i % 10000: print "processing face %i/%i..." % (i - vertex_number, face_number) else: x, y, z,\ nx, ny, nz,\ r, g, b, alpha = data color = tuple((int(r), int(g), int(b), int(alpha))) normal = np.array((float(nx), float(ny), float(nz))) if color in colors: if eig: position = np.array((float(x), float(y), float(z))) output[color].append(position) elif network: output_indices[color].append(i) vertices[i, :] = np.array( (float(x), float(y), float(z), float(r), int(g), int(b), int(alpha))) else: normal = normal / np.linalg.norm(normal) output[color].append(calc_sphere(*normal)) if not i % 10000: print "processing node %i/%i..." % (i, vertex_number) if eig: for color in colors: output[color] = (calc_sphere( *general_axis(np.array(output[color]), -1)), ) elif network: for color in colors: if __debug__: print "processing network for color ", color for plane_vertices_indices in connected_components( output_graphs[color]): colored_vertices = vertices[plane_vertices_indices, :3] dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1)) X, Y, Z = colored_vertices[:, :3].mean(axis=0) highest_vertex = colored_vertices[ np.argmax(colored_vertices[:, 2]), :] lowest_vertex = colored_vertices[ np.argmin(colored_vertices[:, 2]), :] trace = np.linalg.norm(highest_vertex - lowest_vertex) output[color].append((dipdir, dip, X, Y, Z, trace)) #embed() return output
def parse_ply_nx(f, colors, eig=False, network=False): output = {color:[] for color in colors} output_indices = {color:[] for color in colors} output_graphs = {color:Graph() for color in colors} # if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.") # if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.") header_line = f.readline() if not "ply" in header_line: raise IOError("can only read text ply files") while header_line != "end_header\n": print header_line, if "element" in header_line: if "vertex" in header_line: vertex_number = int(header_line.split()[-1]) if "face" in header_line: face_number = int(header_line.split()[-1]) header_line = f.readline() vertices = np.memmap(temp(), dtype="float_", mode="w+", shape=(vertex_number, 7)) for i, line in enumerate(f): data = line.split() if len(data) != 10: nodes = [int(node_index) for node_index in data[1:]] for color in colors: colored_indices = output_indices[color] colored_nodes = [node for node in nodes if tuple(vertices[node,-4:]) == color] if colored_nodes: #print 'edge' output_graphs[color].add_edges_from(combinations(colored_nodes, 2)) if not i % 10000: print "processing face %i/%i..." % (i - vertex_number, face_number) else: x, y, z,\ nx, ny, nz,\ r, g, b, alpha = data color = tuple((int(r), int(g), int(b), int(alpha))) normal = np.array((float(nx), float(ny), float(nz))) if color in colors: if eig: position = np.array((float(x), float(y), float(z))) output[color].append(position) elif network: output_indices[color].append(i) vertices[i,:] = np.array((float(x), float(y), float(z),float(r),int(g),int(b),int(alpha))) else: normal = normal/np.linalg.norm(normal) output[color].append(calc_sphere(*normal)) if not i % 10000: print "processing node %i/%i..." % (i, vertex_number) if eig: for color in colors: output[color] = (calc_sphere(*general_axis(np.array(output[color]), -1)),) elif network: for color in colors: if __debug__: print "processing network for color ", color for plane_vertices_indices in connected_components(output_graphs[color]): colored_vertices = vertices[plane_vertices_indices,:3] dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1)) X, Y, Z = colored_vertices[:, :3].mean(axis=0) highest_vertex = colored_vertices[np.argmax(colored_vertices[:,2]),:] lowest_vertex = colored_vertices[np.argmin(colored_vertices[:,2]),:] trace = np.linalg.norm(highest_vertex - lowest_vertex) output[color].append((dipdir, dip, X, Y, Z, trace)) #embed() return output
def parse_ply_nx(f, colors, eig=False, network=False): output = {color: [] for color in colors} output_indices = {color: [] for color in colors} output_graphs = {color: Graph() for color in colors} # if not f.readline() == "ply": raise Exception("You must use a .ply (stanford format) file.") # if not "ascii" in f.readline(): raise Exception("You must use the ascii .ply specification.") header_line = f.readline() if not "ply" in header_line: raise IOError("can only read text ply files") while header_line != "end_header\n": print header_line, if "element" in header_line: if "vertex" in header_line: vertex_number = int(header_line.split()[-1]) if "face" in header_line: face_number = int(header_line.split()[-1]) header_line = f.readline() vertices = np.memmap(temp(), dtype="float_", mode="w+", shape=(vertex_number, 10)) for i, line in enumerate(f): ####YOU CAN DO BETTER THAN THIS... READ THE NUMBER OF ELEMENTS AND ITERATE ON THAT, LIKE CROWBAR data = line.split() if len(data) < 10: nodes = [int(node_index) for node_index in data[1:]] for color in colors: colored_indices = output_indices[color] colored_nodes = [ node for node in nodes if tuple(vertices[node, 3:7]) == color ] if colored_nodes: #print 'edge' output_graphs[color].add_edges_from( combinations(colored_nodes, 2)) if not i % 10000: print "processing face %i/%i..." % (i - vertex_number, face_number) else: x, y, z,\ nx, ny, nz,\ r, g, b, alpha = data color = tuple((int(r), int(g), int(b), int(alpha))) normal = np.array((float(nx), float(ny), float(nz))) if color in colors: if eig: position = np.array((float(x), float(y), float(z))) output[color].append(position) elif network: output_indices[color].append(i) vertices[i, :] = np.array( (float(x), float(y), float(z), float(r), int(g), int(b), int(alpha), float(nx), float(ny), float(nz))) else: normal = normal / np.linalg.norm(normal) output[color].append(calc_sphere(*normal)) if not i % 10000: print "processing node %i/%i..." % (i, vertex_number) if eig: for color in colors: output[color] = (calc_sphere( *general_axis(np.array(output[color]), -1)), ) elif network: for color in colors: if __debug__: print "processing network for color ", color for plane_vertices_indices in connected_components( output_graphs[color]): colored_vertices = vertices[plane_vertices_indices, :3] dipdir, dip = calc_sphere(*general_axis(colored_vertices, -1)) X, Y, Z = colored_vertices[:, :3].mean(axis=0) highest_vertex = colored_vertices[ np.argmax(colored_vertices[:, 2]), :] lowest_vertex = colored_vertices[ np.argmin(colored_vertices[:, 2]), :] trace = np.linalg.norm(highest_vertex - lowest_vertex) direction_cosines = normalized(vertices[plane_vertices_indices, 7:]) n = direction_cosines.shape[0] resultant_vector = np.sum(direction_cosines, axis=0) fisher_k = (n - 1) / (n - np.linalg.norm(resultant_vector)) direction_tensor = np.dot(direction_cosines.T, direction_cosines) eigen_values, eigen_vectors = np.linalg.eigh(direction_tensor) eigen_values_order = (-eigen_values).argsort() first_eigenvalue,\ second_eigenvalue,\ third_eigenvalue = eigen_values[eigen_values_order] first_eigenvector,\ second_eigenvector,\ third_eigenvector = eigen_vectors[:,eigen_values_order].T #From Vollmer 1990 vollmer_P = (first_eigenvalue - second_eigenvalue) / n vollmer_G = 2 * (second_eigenvalue - third_eigenvalue) / n vollmer_R = 3 * third_eigenvalue / n vollmer_B = vollmer_P + vollmer_G output[color].append( (dipdir, dip, X, Y, Z, trace, n, fisher_k, vollmer_P, vollmer_G, vollmer_R, vollmer_B)) #embed() return output