def VertEdgeTo3D(valuesVert, valuesEdge, interface, sizeX=sizeX, dimFunc=dimFunc, dE1=dimEnd1, dE2=dimEnd2): ''' Starting with the 2D vertices and edges, create the vertices and planes in 3D ''' dim = [ dimFunc(dE1 + (dE2 - dE1) * step / float(nStepsY)) for step in range(nStepsY + 1) ] maxDim = float(max(dim)) dim = [e / maxDim for e in dim] print dim v3vert = [] valuesVertP = COPY(valuesVert) for step in range(nStepsY + 1): vVP = COPY(valuesVertP) for i in range(len(valuesVertP)): (x, z, bc) = valuesVertP[i] if bc in (0, 2): vVP[i] = (x, valuesVert[i][1] * dim[step], bc) if bc == interfaceTag: stepX = int(0.5 + float(x) / sizeX * nStepsX) stepY = step vVP[i] = (x, interface[stepX][stepY], bc) valuesVertP = vVP v3vert.append(valuesVertP) valuesVert3D = [] dY = float(sizeY) / nStepsY for yStep in range(nStepsY + 1): for (x, z, bc) in v3vert[yStep]: valuesVert3D.append((x, yStep * dY, z, bc)) valuesPlane = [] L = len(valuesVert) for (u, v, bc) in valuesEdge: for yStep in range(nStepsY): u1, v1 = yStep * L + u, yStep * L + v u2, v2 = u1 + L, v1 + L valuesPlane.append((4, u2, v2, v1, u1)) valuesPlaneSide = polygonize(valuesVert, valuesEdge, sX=sizeX) valuesPlane.extend(valuesPlaneSide) toLastSide = lambda i: i + L * nStepsY valuesPlane.extend((poly[0], ) + tuple(map(toLastSide, poly[1:])) for poly in valuesPlaneSide) return valuesVert3D, valuesPlane
def reduce_recomb_rate_info(rcmb_rate_info, bim_SNP_positions, test_functionality): """ Purpose ------- To reduce the number of rows in rcmb_rate_info so that every resulting genomic interval contains at least one SNP. Parameters ---------- rcmb_rate_info: a pandas dataframe with two columns. "Position(bp)" is the genomic position of the ith interval's left boundary. The ith row is both the left boundry of the ith interval and the right boundary of the (i-1)th interval. "Map(cM)" is the cumulative recombination rate in centiMorgans, which is 0 in the first row. bim_SNP_positions: the genomic positions of every SNP directly from the input bim file. Returns ------- reduced_rcmb_rate_info: "rcmb_rate_info" with rows removed to ensure that at least one SNP position from "bim_SNP_positions" resides in every interval. """ rcmb_rate_intervals = rcmb_rate_info["Position(bp)"].to_numpy() # INDEXING NOTE: Not subtracting one from "SNP_pos_rcmb_interval_map" returns the closest indices of "rcmb_rate_intervals" # boundaries at genomic positions to the RIGHT of each SNP's genomic position in bim_SNP_positions. # The only row still needed is the closest boundary to the LEFT of the first SNP. This is aquired by # implementing "occupied_rcmb_intervals[np.min(np.where(occupied_rcmb_intervals == True)) - 1] = True". SNP_pos_rcmb_interval_map = SNP_positions_to_rcmb_intervals( rcmb_rate_intervals, COPY(bim_SNP_positions), test_functionality, context=1) all_rcmb_intervals = np.arange(len(rcmb_rate_intervals)) occupied_rcmb_intervals = np.isin(all_rcmb_intervals, SNP_pos_rcmb_interval_map) occupied_rcmb_intervals[np.min(np.where(occupied_rcmb_intervals == True)) - 1] = True reduced_rcmb_rate_info = rcmb_rate_info[occupied_rcmb_intervals] # reduced_rcmb_rate_info.to_csv("correct_reduce_recomb_rate_info_output.txt", sep = "\t", header = True, index = False) if test_functionality == "test_units": unit_tester(reduced_rcmb_rate_info, "correct_reduce_recomb_rate_info_output.txt", 0) return reduced_rcmb_rate_info
def getPolygon(valuesVert, neighbors, prev, curr): ''' helper method for "polygonize" method ''' visited = [] while not curr in visited: visited.append(curr) neigh = COPY(neighbors[curr]) neigh.remove(prev) if len(neigh) == 1: prev, curr = curr, neigh.pop() elif len(neigh) == 2: nA, nB = neigh.pop(), neigh.pop() if getAngle(valuesVert, prev, curr, nA) > getAngle( valuesVert, prev, curr, nB): prev, curr = curr, nA else: prev, curr = curr, nB return (len(visited), ) + tuple(map(lambda i: i + 1, visited))
def addMountain(valuesVert, valuesEdge, mountA, mountB, mountH, function, end1, end2, desiredAmplitude, nSteps, nSegs): ''' adds the interface which hopefully looks like some small mountains ''' # If function is bad :: if the function is constant, or reaches a discontinuity try: function(0) except: # Give the interface a tag of 10 valuesEdgeP = [t if t != (mountA, mountB, 0) else (mountA, mountB, interfaceTag) for t in valuesEdge] return valuesVert, valuesEdgeP #remove line valuesEdgeP = [t for t in valuesEdge if t != (mountA,mountB,0) ] if function == None: return valuesVert, valuesEdgeP n = int(round(float(nSteps) / nSegs)) heights = [function(end1 + float(end2 - end1) * x / n) for x in range(n + 1)] amplitude = max(heights) - min(heights) heights = [mountH + x * float(desiredAmplitude) / amplitude for x in heights] # heights = heights[::-1] + heights heights *= nSegs H = len(heights) segLength = float(sizeX) / (H - 1) values = [(i * segLength, heights[i], interfaceTag) for i in range(H)] valuesVertP = COPY(valuesVert) valuesVertP = insertAndDelete(mountA, values[0], valuesVertP) valuesVertP = insertAndDelete(mountB, values[-1], valuesVertP) values = values[1:-1] V = len(values) #change valuesVert + valuesEdge L = len(valuesVertP) valuesVertP.extend( values ) valuesEdgeP.extend( [(mountA, L+1, interfaceTag), (L+V, mountB, interfaceTag)] ) valuesEdgeP.extend( (L+i, L+i+1, interfaceTag) for i in range(1, V) ) return valuesVertP, valuesEdgeP
def simulate_phenotypes( output_file_names, causal_SNP_IDs_path, cumulative_SNP_counts, major_minor_assignments_path, betas_path, mean_phenotype, sample_size, bim_SNP_names, phenotype, SNP_phenotype_map_path, noise=0, ): """ Purpose ------- to simulate correlations between the genotypes of selected SNPs and a continuous or binary phenotype. Parameters ---------- output_file_names: the names of the output bed files for all chromosomes. causal_SNP_IDs_path: the path to the file containing causal rsIDs from which phenotype values are simulated. cumulative_SNP_counts: the cumulative number of SNPs summed from chromosome 1 to chromosome 22 in ascending order. major_minor_assignments_path: the path to the file specifying whether the major or minor allele in each causal SNP adds 1 to the genotype. betas_path: the path to the file containing one beta coefficient per row. sample_size: the number of samples that have been simulated. bim_SNP_names: a list of SNPs from the output bim file (same as the input bim file). phenotype: an input argument specifying whether to simulate a continuous or binary phenotype. output_name: name of the output bed file, which annotates the chromosome that it belongs to. mean_phenotype: an input argument (float). It can be any number for continuous phenotypes, and it must be in between 0 and 1 for binary phenotypes. SNP_phenotype_map_path: the path to the file specifying whether each causal SNP's phenotype map is additive, dominant, recessive, heterozygous_only, or homozygous_only. noise: a percentage of the mean phenotype that is the standard deviation of the random gaussian noise that contributes to the simulated phenotype's values. Returns ------- It returns a numpy array of one simulated phenotype per simulated whole genome. It only writes the simulated data into plink files. It also writes the values of the inferred beta coefficients and overall R^2 between the causal genotypes and the phenotype into a text file. """ # imports required model components (SNPs and beta values). github_link = "https://github.com/EpistasisLab/regens" causal_SNP_IDs = open(causal_SNP_IDs_path, "r").readlines() try: betas = np.array(open(betas_path, "r").readlines()).astype(np.float64) except: print("\nerror: The beta coefficients file at " + betas_path + " is incorrectly formatted. Visit " + github_link + " for examples of correct formatting.\n") exit() if len(betas) != len(causal_SNP_IDs): print( "\nerror: The causal_SNP_IDs and betas files must have the same number of rows. Visit " + github_link + " for examples of correct formatting.\n") exit() # imports optional model components (major/minor assignments and SNP_phenotype_maps). if major_minor_assignments_path != "standard": major_minor_assignments = open(major_minor_assignments_path, "r").readlines() if len(major_minor_assignments) != len(causal_SNP_IDs): print( "\nerror: The causal_SNP_IDs and major_minor_assignments files must have the same number of rows. Visit " + github_link + " for examples of correct formatting.\n") exit() if SNP_phenotype_map_path != "standard": SNP_phenotype_map = open(SNP_phenotype_map_path, "r").readlines() if len(SNP_phenotype_map) != len(causal_SNP_IDs): print( "\nerror: The causal_SNP_IDs and SNP_phenotype_map files must have the same number of rows. Visit " + github_link + " for examples of correct formatting.\n") exit() # simulates phenotypes based on model specifications feature_size = len(betas) features = np.zeros((sample_size, feature_size)) for p in range(feature_size): feature_SNP_IDs = causal_SNP_IDs[p].strip().split("\t") try: feature_SNPs = get_feature_SNPs( feature_SNP_IDs, cumulative_SNP_counts, output_file_names, sample_size, bim_SNP_names, ) except: print( "\nerror: The causal SNP IDs on row " + str(p + 1) + " are either incorrectly formatted or they do not exist in the input bim file:\n" ) print("Visit " + github_link + " for examples of correct formatting.\n") if major_minor_assignments_path != "standard": if np.all( np.isin(major_minor_assignments[p].strip().split("\t"), ["0", "1"])): feature_major_minor_assignments = np.array( major_minor_assignments[p].strip().split("\t")).astype( np.int64) feature_major_minor_assignments_alt = COPY( feature_major_minor_assignments) feature_major_minor_assignments_alt[ feature_major_minor_assignments_alt == 0] = -1 feature_SNPs_with_assignments = ( feature_SNPs - 2 * feature_major_minor_assignments) * ( -1 * feature_major_minor_assignments_alt) else: print( "\nerror: The major minor assignments on row " + str(p + 1) + " are either incorrectly formatted or they do not exist in the input bim file:\n" ) print("Visit " + github_link + " for examples of correct formatting.\n") if major_minor_assignments_path == "standard": feature_SNPs_with_assignments = feature_SNPs if SNP_phenotype_map_path != "standard": feature_SNP_phenotype_map = SNP_phenotype_map[p].strip().split( "\t") if feature_SNPs_with_assignments.shape[1] > 1: for m in range(len(feature_SNP_phenotype_map)): if feature_SNP_phenotype_map[m] == "recessive": feature_SNPs_with_assignments[:, m][ feature_SNPs_with_assignments[:, m] == 1] = 0 elif feature_SNP_phenotype_map[m] == "dominant": feature_SNPs_with_assignments[:, m][ feature_SNPs_with_assignments[:, m] == 1] = 2 elif feature_SNP_phenotype_map[m] == "heterozygous_only": feature_SNPs_with_assignments[:, m][ feature_SNPs_with_assignments[:, m] == 2] = 0 feature_SNPs_with_assignments[:, m][ feature_SNPs_with_assignments[:, m] == 1] = 2 elif feature_SNP_phenotype_map[m] == "homozygous_only": feature_SNPs_with_assignments[:, m][ feature_SNPs_with_assignments[:, m] == 0] = 2 feature_SNPs_with_assignments[:, m][ feature_SNPs_with_assignments[:, m] == 1] = 0 elif feature_SNP_phenotype_map[m] == "regular": pass else: print( "\nerror: all SNP_phenotype labels must be 'regular', 'recessive', 'dominant', 'heterozygous_only', or 'homozygous_only'.\n" ) print("Visit " + github_link + " for examples of correct formatting.\n") exit() if feature_SNPs_with_assignments.shape[1] == 1: for m in range(len(feature_SNP_phenotype_map)): if feature_SNP_phenotype_map[m] == "recessive": feature_SNPs_with_assignments[ feature_SNPs_with_assignments == 1] = 0 elif feature_SNP_phenotype_map[m] == "dominant": feature_SNPs_with_assignments[ feature_SNPs_with_assignments == 1] = 2 elif feature_SNP_phenotype_map[m] == "heterozygous_only": feature_SNPs_with_assignments[ feature_SNPs_with_assignments == 2] = 0 feature_SNPs_with_assignments[ feature_SNPs_with_assignments == 1] = 2 elif feature_SNP_phenotype_map[m] == "homozygous_only": feature_SNPs_with_assignments[ feature_SNPs_with_assignments == 0] = 2 feature_SNPs_with_assignments[ feature_SNPs_with_assignments == 1] = 0 elif feature_SNP_phenotype_map[m] == "regular": pass else: print( "\nerror: all SNP_phenotype labels must be 'regular', 'recessive', 'dominant', 'heterozygous_only', or 'homozygous_only'.\n" ) print("Visit " + github_link + " for examples of correct formatting.\n") exit() features[:, p] = np.product(feature_SNPs_with_assignments, axis=1) weighted_feature_sums = np.sum(betas * features, axis=1, keepdims=True) weighted_feature_sums += np.random.normal( loc=0, scale=noise * np.mean(weighted_feature_sums), size=weighted_feature_sums.shape, ) if phenotype == "binary": def logistic_with_unknown_intercept(intercept, weighted_feature_sums, mean_phenotype): disease_probabilities = 1 / ( 1 + np.exp(-1 * (weighted_feature_sums + intercept))) return np.mean(disease_probabilities) - mean_phenotype intercept = root( fun=logistic_with_unknown_intercept, x0=np.array([0]), args=(weighted_feature_sums, mean_phenotype), ).x[0] disease_probabilities = 1 / ( 1 + np.exp(-1 * (weighted_feature_sums + intercept))) simulated_phenotypes = (np.random.rand(len(disease_probabilities)) <= disease_probabilities.reshape(-1)).astype( np.int8) model = LogisticRegression(C=1e100, tol=1e-100, max_iter=1000000, solver="lbfgs").fit(features, simulated_phenotypes) elif phenotype == "continuous": def linear_with_unknown_intercept(intercept, weighted_feature_sums, mean_phenotype): return np.mean(weighted_feature_sums + intercept) - mean_phenotype intercept = root( fun=linear_with_unknown_intercept, x0=np.array([0]), args=(weighted_feature_sums, mean_phenotype), ).x[0] simulated_phenotypes = weighted_feature_sums + intercept model = LinearRegression().fit(features, simulated_phenotypes) else: print("error: phenotype must be either 'binary' or 'continuous'.") exit() model_profile = open(output_file_names[0][:-8] + "model_profile.txt", "w") model_profile.write("measured R^2 of model fit: " + str(model.score(features, simulated_phenotypes)) + "\n") for i, b in enumerate(model.coef_[0]): model_profile.write("measured beta value of feature" + str(i + 1) + ": " + str(b) + "\n") model_profile.write("measured beta value of intercept: " + str(model.intercept_[0])) model_profile.close() return simulated_phenotypes
def draw_breakpoints( rcmb_rate_info, bim_SNP_positions, num_breakpoints, simulation_sample_size, test_functionality, chromosome_number, output_plink_filename_prefix, ): """ Purpose ------- Computes breakpoint sampling probabilities with "centimorgans_to_probabilities", Draws breakpoints with "choice_with_periodic_replacement", and converts the breakpoints' corresponding recombination interval indices into the indices of input SNPs that reside inside of the recombination interval. Parameters ---------- rcmb_rate_info: Output from the "reduce_recomb_rate_info" function. bim_SNP_positions: the genomic positions of every SNP directly from the input bim file. num_breakpoints: user-specified (int) number of breakpoints per chromosome. simulation_sample_size: user-specified number of samples to be simulated. test_functionality: an argument which, if equal to "yes", tests regens' functionality. It substantially increases runtime. chromosome_number: the chromosome that is currently being simulated. output_plink_filename_prefix: plink prefix of the (bed, bim, fam) fileset that will contain simulated individuals. Returns ------- an NxB numpy array containing N sets of B recombination interval indices. Each index is an input SNP's bim row index (also it's bed column index). """ if test_functionality == "test_correctness": from regens_testers import test_drawn_breakpoints from regens_testers import test_breakpoint_SNP_mapping SNP_count = len(bim_SNP_positions) probabilities = centimorgans_to_probabilities(rcmb_rate_info, test_functionality) rcmb_rate_intervals = rcmb_rate_info["Position(bp)"].to_numpy() breakpoints = choice_with_periodic_replacement(simulation_sample_size, num_breakpoints, probabilities, test_functionality) if test_functionality == "test_correctness": test_drawn_breakpoints(breakpoints, probabilities, chromosome_number, output_plink_filename_prefix) old_breakpoints = COPY(breakpoints) # INDEXING NOTE: Subtracting one from "SNP_pos_rcmb_interval_map" returns the closest indices of "rcmb_rate_intervals" # boundaries at genomic positions to the LEFT of each SNP's genomic position in bim_SNP_positions. # This is because all SNPs up to the SNP immediately to the left of the ith breakpoint comprise the ith # segment, noting that the (B+1)th includes all SNPs after the Bth breakpoint (there are B breakpoints). SNP_pos_rcmb_interval_map = ( SNP_positions_to_rcmb_intervals(rcmb_rate_intervals, COPY(bim_SNP_positions), test_functionality, context=2) - 1) rcmb_interval_SNP_pos_map = {} for rcmb_interval in np.unique(SNP_pos_rcmb_interval_map): rcmb_interval_SNP_pos_map[rcmb_interval] = np.where( SNP_pos_rcmb_interval_map == rcmb_interval)[0] for jj in range(len(breakpoints)): for k in range(num_breakpoints): interval_index = breakpoints[jj][k] SNP_indices = rcmb_interval_SNP_pos_map[interval_index] if len(SNP_indices) == 1: breakpoints[jj][k] = SNP_indices[0] else: breakpoints[jj][k] = SNP_indices[int( len(SNP_indices) * np.random.rand() - 0.5)] if test_functionality == "test_correctness": test_breakpoint_SNP_mapping(old_breakpoints, rcmb_rate_intervals, breakpoints, bim_SNP_positions) if test_functionality == "test_units": unit_tester(breakpoints, "correct_draw_breakpoints_output.txt", None) return breakpoints
Jacobians: [J_1, J_2, ... ] ReferenceMassMatrix: [[1, 2, ... ];[3, 4, ... ]; ... ] ''' from globalVars import dimension, order from readMesh import Nodes, Elements, Edges, Neighbors from getBaseFunctions import ReferenceBaseFunctions from Polynomial import Polynomial from copy import deepcopy as COPY from numpy.linalg import det Jacobians = [] if dimension == 2: for element, attribute in Elements: points = [Nodes[element[i]] for i in range(3)] b = COPY(points[0]) A = COPY(points[1:]) A = [[node[i] - b[i] for i in range(2)] for node in A] Jacobian = det(A) Jacobians.append(Jacobian) elif dimension == 3: for element, attribute in Elements: points = [Nodes[element[i]] for i in range(4)] b = COPY(points[0]) A = COPY(points[1:]) A = [[node[i] - b[i] for i in range(3)] for node in A] Jacobian = det(A) Jacobians.append(Jacobian) numBaseFunctions = len(ReferenceBaseFunctions)
def copy(self): return COPY(self)
def copy(self): res = COPY(self) if hasattr(res, 'par'): res.par = res.par.copy() return res