def VertEdgeTo3D(valuesVert,
                 valuesEdge,
                 interface,
                 sizeX=sizeX,
                 dimFunc=dimFunc,
                 dE1=dimEnd1,
                 dE2=dimEnd2):
    '''
	Starting with the 2D vertices and edges, create the vertices and planes in 3D
	'''
    dim = [
        dimFunc(dE1 + (dE2 - dE1) * step / float(nStepsY))
        for step in range(nStepsY + 1)
    ]
    maxDim = float(max(dim))
    dim = [e / maxDim for e in dim]

    print dim

    v3vert = []
    valuesVertP = COPY(valuesVert)

    for step in range(nStepsY + 1):
        vVP = COPY(valuesVertP)
        for i in range(len(valuesVertP)):
            (x, z, bc) = valuesVertP[i]
            if bc in (0, 2):
                vVP[i] = (x, valuesVert[i][1] * dim[step], bc)
            if bc == interfaceTag:
                stepX = int(0.5 + float(x) / sizeX * nStepsX)
                stepY = step
                vVP[i] = (x, interface[stepX][stepY], bc)
        valuesVertP = vVP
        v3vert.append(valuesVertP)

    valuesVert3D = []
    dY = float(sizeY) / nStepsY
    for yStep in range(nStepsY + 1):
        for (x, z, bc) in v3vert[yStep]:
            valuesVert3D.append((x, yStep * dY, z, bc))

    valuesPlane = []
    L = len(valuesVert)
    for (u, v, bc) in valuesEdge:
        for yStep in range(nStepsY):
            u1, v1 = yStep * L + u, yStep * L + v
            u2, v2 = u1 + L, v1 + L
            valuesPlane.append((4, u2, v2, v1, u1))

    valuesPlaneSide = polygonize(valuesVert, valuesEdge, sX=sizeX)
    valuesPlane.extend(valuesPlaneSide)
    toLastSide = lambda i: i + L * nStepsY
    valuesPlane.extend((poly[0], ) + tuple(map(toLastSide, poly[1:]))
                       for poly in valuesPlaneSide)

    return valuesVert3D, valuesPlane
示例#2
0
def reduce_recomb_rate_info(rcmb_rate_info, bim_SNP_positions,
                            test_functionality):
    """

    Purpose
    -------
    To reduce the number of rows in rcmb_rate_info so that every resulting genomic interval contains at least one SNP.

    Parameters
    ----------
    rcmb_rate_info: a pandas dataframe with two columns. "Position(bp)" is the genomic position of the ith interval's left
                    boundary. The ith row is both the left boundry of the ith interval and the right boundary of the (i-1)th
                    interval. "Map(cM)" is the cumulative recombination rate in centiMorgans, which is 0 in the first row.

    bim_SNP_positions: the genomic positions of every SNP directly from the input bim file.

    Returns
    -------
    reduced_rcmb_rate_info: "rcmb_rate_info" with rows removed to ensure that at least one
                            SNP position from "bim_SNP_positions" resides in every interval.

    """

    rcmb_rate_intervals = rcmb_rate_info["Position(bp)"].to_numpy()

    # INDEXING NOTE: Not subtracting one from "SNP_pos_rcmb_interval_map" returns the closest indices of "rcmb_rate_intervals"
    #                boundaries at genomic positions to the RIGHT of each SNP's genomic position in bim_SNP_positions.
    #                The only row still needed is the closest boundary to the LEFT of the first SNP. This is aquired by
    #                implementing "occupied_rcmb_intervals[np.min(np.where(occupied_rcmb_intervals == True)) - 1] = True".

    SNP_pos_rcmb_interval_map = SNP_positions_to_rcmb_intervals(
        rcmb_rate_intervals,
        COPY(bim_SNP_positions),
        test_functionality,
        context=1)
    all_rcmb_intervals = np.arange(len(rcmb_rate_intervals))
    occupied_rcmb_intervals = np.isin(all_rcmb_intervals,
                                      SNP_pos_rcmb_interval_map)
    occupied_rcmb_intervals[np.min(np.where(occupied_rcmb_intervals == True)) -
                            1] = True
    reduced_rcmb_rate_info = rcmb_rate_info[occupied_rcmb_intervals]

    # reduced_rcmb_rate_info.to_csv("correct_reduce_recomb_rate_info_output.txt", sep = "\t", header = True, index = False)
    if test_functionality == "test_units":
        unit_tester(reduced_rcmb_rate_info,
                    "correct_reduce_recomb_rate_info_output.txt", 0)
    return reduced_rcmb_rate_info
def getPolygon(valuesVert, neighbors, prev, curr):
    '''
	helper method for "polygonize" method
	'''
    visited = []
    while not curr in visited:
        visited.append(curr)
        neigh = COPY(neighbors[curr])
        neigh.remove(prev)
        if len(neigh) == 1:
            prev, curr = curr, neigh.pop()
        elif len(neigh) == 2:
            nA, nB = neigh.pop(), neigh.pop()
            if getAngle(valuesVert, prev, curr, nA) > getAngle(
                    valuesVert, prev, curr, nB):
                prev, curr = curr, nA
            else:
                prev, curr = curr, nB
    return (len(visited), ) + tuple(map(lambda i: i + 1, visited))
def addMountain(valuesVert, valuesEdge, mountA, mountB, mountH, function, end1, end2, desiredAmplitude, nSteps, nSegs):
	'''
	adds the interface which hopefully looks like some small mountains
	'''
	# If function is bad :: if the function is constant, or reaches a discontinuity
	try:
		function(0)
	except:
		# Give the interface a tag of 10
		valuesEdgeP = [t if t != (mountA, mountB, 0) else (mountA, mountB, interfaceTag) for t in valuesEdge]
		return valuesVert, valuesEdgeP

	#remove line
	valuesEdgeP = [t for t in valuesEdge if t != (mountA,mountB,0) ]

	if function == None:
		return valuesVert, valuesEdgeP

	n = int(round(float(nSteps) / nSegs))		
	heights = [function(end1 + float(end2 - end1) * x / n) for x in range(n + 1)]
	amplitude = max(heights) - min(heights)

	heights = [mountH + x * float(desiredAmplitude) / amplitude for x in heights]
#	heights = heights[::-1] + heights
	heights *= nSegs

	H = len(heights)
	segLength = float(sizeX) / (H - 1)
	values = [(i * segLength, heights[i], interfaceTag) for i in range(H)]
	
	valuesVertP = COPY(valuesVert)
	valuesVertP = insertAndDelete(mountA, values[0], valuesVertP)
	valuesVertP = insertAndDelete(mountB, values[-1], valuesVertP)
	values = values[1:-1]
	V = len(values)
	
	#change valuesVert + valuesEdge
	L = len(valuesVertP)
	valuesVertP.extend( values )
	valuesEdgeP.extend( [(mountA, L+1, interfaceTag), (L+V, mountB, interfaceTag)] )
	valuesEdgeP.extend( (L+i, L+i+1, interfaceTag) for i in range(1, V) )
	return valuesVertP, valuesEdgeP
示例#5
0
def simulate_phenotypes(
    output_file_names,
    causal_SNP_IDs_path,
    cumulative_SNP_counts,
    major_minor_assignments_path,
    betas_path,
    mean_phenotype,
    sample_size,
    bim_SNP_names,
    phenotype,
    SNP_phenotype_map_path,
    noise=0,
):
    """

    Purpose
    -------
    to simulate correlations between the genotypes of selected SNPs and a continuous or binary phenotype.

    Parameters
    ----------
    output_file_names: the names of the output bed files for all chromosomes.
    causal_SNP_IDs_path: the path to the file containing causal rsIDs from which phenotype values are simulated.
    cumulative_SNP_counts: the cumulative number of SNPs summed from chromosome 1 to chromosome 22 in ascending order.
    major_minor_assignments_path: the path to the file specifying whether the major
                                  or minor allele in each causal SNP adds 1 to the genotype.
    betas_path: the path to the file containing one beta coefficient per row.
    sample_size: the number of samples that have been simulated.
    bim_SNP_names: a list of SNPs from the output bim file (same as the input bim file).
    phenotype: an input argument specifying whether to simulate a continuous or binary phenotype.
    output_name: name of the output bed file, which annotates the chromosome that it belongs to.
    mean_phenotype: an input argument (float). It can be any number for continuous phenotypes,
                    and it must be in between 0 and 1 for binary phenotypes.
    SNP_phenotype_map_path: the path to the file specifying whether each causal SNP's phenotype map
                            is additive, dominant, recessive, heterozygous_only, or homozygous_only.
    noise: a percentage of the mean phenotype that is the standard deviation of the
           random gaussian noise that contributes to the simulated phenotype's values.

    Returns
    -------
    It returns a numpy array of one simulated phenotype per simulated whole genome. It only writes
    the simulated data into plink files. It also writes the values of the inferred beta coefficients
    and overall R^2 between the causal genotypes and the phenotype into a text file.

    """

    # imports required model components (SNPs and beta values).
    github_link = "https://github.com/EpistasisLab/regens"
    causal_SNP_IDs = open(causal_SNP_IDs_path, "r").readlines()
    try:
        betas = np.array(open(betas_path, "r").readlines()).astype(np.float64)
    except:
        print("\nerror: The beta coefficients file at " + betas_path +
              " is incorrectly formatted. Visit " + github_link +
              " for examples of correct formatting.\n")
        exit()
    if len(betas) != len(causal_SNP_IDs):
        print(
            "\nerror: The causal_SNP_IDs and betas files must have the same number of rows. Visit "
            + github_link + " for examples of correct formatting.\n")
        exit()

    # imports optional model components (major/minor assignments and SNP_phenotype_maps).
    if major_minor_assignments_path != "standard":
        major_minor_assignments = open(major_minor_assignments_path,
                                       "r").readlines()
        if len(major_minor_assignments) != len(causal_SNP_IDs):
            print(
                "\nerror: The causal_SNP_IDs and major_minor_assignments files must have the same number of rows. Visit "
                + github_link + " for examples of correct formatting.\n")
            exit()
    if SNP_phenotype_map_path != "standard":
        SNP_phenotype_map = open(SNP_phenotype_map_path, "r").readlines()
        if len(SNP_phenotype_map) != len(causal_SNP_IDs):
            print(
                "\nerror: The causal_SNP_IDs and SNP_phenotype_map files must have the same number of rows. Visit "
                + github_link + " for examples of correct formatting.\n")
            exit()

    # simulates phenotypes based on model specifications
    feature_size = len(betas)
    features = np.zeros((sample_size, feature_size))
    for p in range(feature_size):
        feature_SNP_IDs = causal_SNP_IDs[p].strip().split("\t")
        try:
            feature_SNPs = get_feature_SNPs(
                feature_SNP_IDs,
                cumulative_SNP_counts,
                output_file_names,
                sample_size,
                bim_SNP_names,
            )
        except:
            print(
                "\nerror: The causal SNP IDs on row " + str(p + 1) +
                " are either incorrectly formatted or they do not exist in the input bim file:\n"
            )
            print("Visit " + github_link +
                  " for examples of correct formatting.\n")
        if major_minor_assignments_path != "standard":
            if np.all(
                    np.isin(major_minor_assignments[p].strip().split("\t"),
                            ["0", "1"])):
                feature_major_minor_assignments = np.array(
                    major_minor_assignments[p].strip().split("\t")).astype(
                        np.int64)
                feature_major_minor_assignments_alt = COPY(
                    feature_major_minor_assignments)
                feature_major_minor_assignments_alt[
                    feature_major_minor_assignments_alt == 0] = -1
                feature_SNPs_with_assignments = (
                    feature_SNPs - 2 * feature_major_minor_assignments) * (
                        -1 * feature_major_minor_assignments_alt)
            else:
                print(
                    "\nerror: The major minor assignments on row " +
                    str(p + 1) +
                    " are either incorrectly formatted or they do not exist in the input bim file:\n"
                )
                print("Visit " + github_link +
                      " for examples of correct formatting.\n")
        if major_minor_assignments_path == "standard":
            feature_SNPs_with_assignments = feature_SNPs

        if SNP_phenotype_map_path != "standard":
            feature_SNP_phenotype_map = SNP_phenotype_map[p].strip().split(
                "\t")
            if feature_SNPs_with_assignments.shape[1] > 1:
                for m in range(len(feature_SNP_phenotype_map)):
                    if feature_SNP_phenotype_map[m] == "recessive":
                        feature_SNPs_with_assignments[:, m][
                            feature_SNPs_with_assignments[:, m] == 1] = 0
                    elif feature_SNP_phenotype_map[m] == "dominant":
                        feature_SNPs_with_assignments[:, m][
                            feature_SNPs_with_assignments[:, m] == 1] = 2
                    elif feature_SNP_phenotype_map[m] == "heterozygous_only":
                        feature_SNPs_with_assignments[:, m][
                            feature_SNPs_with_assignments[:, m] == 2] = 0
                        feature_SNPs_with_assignments[:, m][
                            feature_SNPs_with_assignments[:, m] == 1] = 2
                    elif feature_SNP_phenotype_map[m] == "homozygous_only":
                        feature_SNPs_with_assignments[:, m][
                            feature_SNPs_with_assignments[:, m] == 0] = 2
                        feature_SNPs_with_assignments[:, m][
                            feature_SNPs_with_assignments[:, m] == 1] = 0
                    elif feature_SNP_phenotype_map[m] == "regular":
                        pass
                    else:
                        print(
                            "\nerror: all SNP_phenotype labels must be 'regular', 'recessive', 'dominant', 'heterozygous_only', or 'homozygous_only'.\n"
                        )
                        print("Visit " + github_link +
                              " for examples of correct formatting.\n")
                        exit()
            if feature_SNPs_with_assignments.shape[1] == 1:
                for m in range(len(feature_SNP_phenotype_map)):
                    if feature_SNP_phenotype_map[m] == "recessive":
                        feature_SNPs_with_assignments[
                            feature_SNPs_with_assignments == 1] = 0
                    elif feature_SNP_phenotype_map[m] == "dominant":
                        feature_SNPs_with_assignments[
                            feature_SNPs_with_assignments == 1] = 2
                    elif feature_SNP_phenotype_map[m] == "heterozygous_only":
                        feature_SNPs_with_assignments[
                            feature_SNPs_with_assignments == 2] = 0
                        feature_SNPs_with_assignments[
                            feature_SNPs_with_assignments == 1] = 2
                    elif feature_SNP_phenotype_map[m] == "homozygous_only":
                        feature_SNPs_with_assignments[
                            feature_SNPs_with_assignments == 0] = 2
                        feature_SNPs_with_assignments[
                            feature_SNPs_with_assignments == 1] = 0
                    elif feature_SNP_phenotype_map[m] == "regular":
                        pass
                    else:
                        print(
                            "\nerror: all SNP_phenotype labels must be 'regular', 'recessive', 'dominant', 'heterozygous_only', or 'homozygous_only'.\n"
                        )
                        print("Visit " + github_link +
                              " for examples of correct formatting.\n")
                        exit()

        features[:, p] = np.product(feature_SNPs_with_assignments, axis=1)
    weighted_feature_sums = np.sum(betas * features, axis=1, keepdims=True)
    weighted_feature_sums += np.random.normal(
        loc=0,
        scale=noise * np.mean(weighted_feature_sums),
        size=weighted_feature_sums.shape,
    )

    if phenotype == "binary":

        def logistic_with_unknown_intercept(intercept, weighted_feature_sums,
                                            mean_phenotype):
            disease_probabilities = 1 / (
                1 + np.exp(-1 * (weighted_feature_sums + intercept)))
            return np.mean(disease_probabilities) - mean_phenotype

        intercept = root(
            fun=logistic_with_unknown_intercept,
            x0=np.array([0]),
            args=(weighted_feature_sums, mean_phenotype),
        ).x[0]
        disease_probabilities = 1 / (
            1 + np.exp(-1 * (weighted_feature_sums + intercept)))
        simulated_phenotypes = (np.random.rand(len(disease_probabilities)) <=
                                disease_probabilities.reshape(-1)).astype(
                                    np.int8)
        model = LogisticRegression(C=1e100,
                                   tol=1e-100,
                                   max_iter=1000000,
                                   solver="lbfgs").fit(features,
                                                       simulated_phenotypes)

    elif phenotype == "continuous":

        def linear_with_unknown_intercept(intercept, weighted_feature_sums,
                                          mean_phenotype):
            return np.mean(weighted_feature_sums + intercept) - mean_phenotype

        intercept = root(
            fun=linear_with_unknown_intercept,
            x0=np.array([0]),
            args=(weighted_feature_sums, mean_phenotype),
        ).x[0]
        simulated_phenotypes = weighted_feature_sums + intercept
        model = LinearRegression().fit(features, simulated_phenotypes)

    else:
        print("error: phenotype must be either 'binary' or 'continuous'.")
        exit()

    model_profile = open(output_file_names[0][:-8] + "model_profile.txt", "w")
    model_profile.write("measured R^2 of model fit: " +
                        str(model.score(features, simulated_phenotypes)) +
                        "\n")
    for i, b in enumerate(model.coef_[0]):
        model_profile.write("measured beta value of feature" + str(i + 1) +
                            ": " + str(b) + "\n")
    model_profile.write("measured beta value of intercept: " +
                        str(model.intercept_[0]))
    model_profile.close()
    return simulated_phenotypes
示例#6
0
def draw_breakpoints(
    rcmb_rate_info,
    bim_SNP_positions,
    num_breakpoints,
    simulation_sample_size,
    test_functionality,
    chromosome_number,
    output_plink_filename_prefix,
):
    """

    Purpose
    -------
    Computes breakpoint sampling probabilities with "centimorgans_to_probabilities", Draws breakpoints
    with "choice_with_periodic_replacement", and converts the breakpoints' corresponding recombination
    interval indices into the indices of input SNPs that reside inside of the recombination interval.

    Parameters
    ----------
    rcmb_rate_info:  Output from the "reduce_recomb_rate_info" function.
    bim_SNP_positions: the genomic positions of every SNP directly from the input bim file.
    num_breakpoints: user-specified (int) number of breakpoints per chromosome.
    simulation_sample_size: user-specified number of samples to be simulated.
    test_functionality: an argument which, if equal to "yes", tests regens' functionality. It substantially increases runtime.
    chromosome_number: the chromosome that is currently being simulated.
    output_plink_filename_prefix: plink prefix of the (bed, bim, fam) fileset that will contain simulated individuals.

    Returns
    -------
    an NxB numpy array containing N sets of B recombination interval indices.
    Each index is an input SNP's bim row index (also it's bed column index).

    """

    if test_functionality == "test_correctness":
        from regens_testers import test_drawn_breakpoints
        from regens_testers import test_breakpoint_SNP_mapping

    SNP_count = len(bim_SNP_positions)
    probabilities = centimorgans_to_probabilities(rcmb_rate_info,
                                                  test_functionality)
    rcmb_rate_intervals = rcmb_rate_info["Position(bp)"].to_numpy()
    breakpoints = choice_with_periodic_replacement(simulation_sample_size,
                                                   num_breakpoints,
                                                   probabilities,
                                                   test_functionality)

    if test_functionality == "test_correctness":
        test_drawn_breakpoints(breakpoints, probabilities, chromosome_number,
                               output_plink_filename_prefix)
        old_breakpoints = COPY(breakpoints)

    # INDEXING NOTE: Subtracting one from "SNP_pos_rcmb_interval_map" returns the closest indices of "rcmb_rate_intervals"
    #                boundaries at genomic positions to the LEFT of each SNP's genomic position in bim_SNP_positions.
    #                This is because all SNPs up to the SNP immediately to the left of the ith breakpoint comprise the ith
    #                segment, noting that the (B+1)th includes all SNPs after the Bth breakpoint (there are B breakpoints).

    SNP_pos_rcmb_interval_map = (
        SNP_positions_to_rcmb_intervals(rcmb_rate_intervals,
                                        COPY(bim_SNP_positions),
                                        test_functionality,
                                        context=2) - 1)

    rcmb_interval_SNP_pos_map = {}
    for rcmb_interval in np.unique(SNP_pos_rcmb_interval_map):
        rcmb_interval_SNP_pos_map[rcmb_interval] = np.where(
            SNP_pos_rcmb_interval_map == rcmb_interval)[0]

    for jj in range(len(breakpoints)):
        for k in range(num_breakpoints):
            interval_index = breakpoints[jj][k]
            SNP_indices = rcmb_interval_SNP_pos_map[interval_index]
            if len(SNP_indices) == 1:
                breakpoints[jj][k] = SNP_indices[0]
            else:
                breakpoints[jj][k] = SNP_indices[int(
                    len(SNP_indices) * np.random.rand() - 0.5)]

    if test_functionality == "test_correctness":
        test_breakpoint_SNP_mapping(old_breakpoints, rcmb_rate_intervals,
                                    breakpoints, bim_SNP_positions)
    if test_functionality == "test_units":
        unit_tester(breakpoints, "correct_draw_breakpoints_output.txt", None)

    return breakpoints
示例#7
0
Jacobians: [J_1, J_2, ... ]
ReferenceMassMatrix: [[1, 2, ... ];[3, 4, ... ]; ... ]
'''
from globalVars import dimension, order
from readMesh import Nodes, Elements, Edges, Neighbors
from getBaseFunctions import ReferenceBaseFunctions
from Polynomial import Polynomial
from copy import deepcopy as COPY
from numpy.linalg import det

Jacobians = []

if dimension == 2:
    for element, attribute in Elements:
        points = [Nodes[element[i]] for i in range(3)]
        b = COPY(points[0])
        A = COPY(points[1:])
        A = [[node[i] - b[i] for i in range(2)] for node in A]
        Jacobian = det(A)
        Jacobians.append(Jacobian)

elif dimension == 3:
    for element, attribute in Elements:
        points = [Nodes[element[i]] for i in range(4)]
        b = COPY(points[0])
        A = COPY(points[1:])
        A = [[node[i] - b[i] for i in range(3)] for node in A]
        Jacobian = det(A)
        Jacobians.append(Jacobian)

numBaseFunctions = len(ReferenceBaseFunctions)
示例#8
0
 def copy(self):
     return COPY(self)
示例#9
0
 def copy(self):
     res = COPY(self)
     if hasattr(res, 'par'):
         res.par = res.par.copy()
     return res