def gen_x_chr(reference_file: str, working_folder: str, idx_run: int,
              idx_perm: int, idx_m: int, idx_f: int, n_male: int,
              n_female: int, xchr_male: [str], xchr_female: [str],
              gender_perm: [[int]]):

    pedigree = DefaultPedigree()

    id_no_gender = ["Offspring3", "Offspring8", "Offspring17"]

    parents_ids = pedigree.get_parents_ids()

    # get the gender for the pedigree and randomize gender for the unknowns
    gender = pedigree.get_pedigree_gender()
    ids = pedigree.get_pedigree_ids()
    ped_male = 0
    ped_female = 0
    for idx_g in range(len(gender)):
        if gender[idx_g] == -1:
            if ids[idx_g] == id_no_gender[0]:
                gender[idx_g] = gender_perm[idx_perm][0]
            elif ids[idx_g] == id_no_gender[1]:
                gender[idx_g] = gender_perm[idx_perm][1]
            elif ids[idx_g] == id_no_gender[2]:
                gender[idx_g] = gender_perm[idx_perm][2]
            else:
                print("Unknown ID: {}".format(ids[idx_g]))
                gender[idx_g] = random.choice([0, 1])

        if gender[idx_g] == 0:
            ped_female = ped_female + 1
        elif gender[idx_g] == 1:
            ped_male = ped_male + 1

    pedigree_str = "pedigree_{}".format(idx_run + 1)
    pedigree_folder = pedigree_str
    if working_folder is not None:
        pedigree_folder = os.path.join(working_folder, pedigree_folder)

    # create folder for the current run
    os.makedirs(pedigree_folder)

    if idx_m + ped_male >= n_male:
        random.shuffle(xchr_male)
        idx_m = 0
    if idx_f + ped_female >= n_female:
        random.shuffle(xchr_female)
        idx_f = 0

    # now we have the female and male candidates
    pedigree = []
    for idx_ind, id_ind in enumerate(ids):
        ind = Individual(ind_id=id_ind)
        ind.set_gender(gender=gender[idx_ind])
        if "Founder" in ind.id:
            if gender[idx_ind] == 0:
                x = xchr_female[idx_f]
                idx_f = idx_f + 1
            else:
                x = xchr_male[idx_m]
                idx_m = idx_m + 1

            ind.set_x_chromosome(x_chromosome=x)
        else:
            ind.set_parents([
                lib.get_individual(pedigree, parents_ids[idx_ind][0]),
                lib.get_individual(pedigree, parents_ids[idx_ind][1])
            ])
            ind.x_breed()
        pedigree.append(ind)

    # save the population
    x_pedigree_file = "{}/x_{}.vcf".format(pedigree_folder, pedigree_str)
    lib.export2vcf(population=pedigree,
                   vcf_header_template=reference_file,
                   vcf_file=x_pedigree_file,
                   save_all=True,
                   x_chromosome=True)
Exemplo n.º 2
0
def init_autosome_pedigrees(founders_file: str,
                            n_pedigree=1,
                            output_folder=None,
                            feedback=True):
    """
    Generate multiple autosome pedigrees (n_pedigree) starting from a reference VCF file (founders_file) that contains
    candidate founders from which the pedigrees are developed.

    :param str founders_file : The autosome reference VCF file that contains candidate founders for the pedigrees.
    :param int, optional n_pedigree : The number of pedigrees that should be created. Optional parameter with default
    value equal to 1.
    :param str, optional output_folder : An output folder that should be created where all the pedigrees are stored.
    Optional parameter, when not defined the folders of each pedigree are created in the current working directory.
    N.B. The output folder should not exists. It will be created during the process. If the folder already exists,
    the script will terminate with an error.
    :param bool, optional feedback : If the flag is set to True it will provide a visual feedback of the ongoing
    process, otherwise it will run silently with only minimal output.
    """

    # create output folder if necessary
    if output_folder is not None:
        if feedback:
            msg = "Initializing output folder '{}'".format(output_folder)
            sys.stdout.write(msg)
            sys.stdout.flush()

        os.makedirs(output_folder)  # create output folder

        if feedback:
            sys.stdout.write(" [OK]")
            sys.stdout.flush()

    name, file_extension = os.path.splitext(
        founders_file)  # separate filename and extension

    if feedback:
        msg = "\nLoad candidate founders"
        sys.stdout.write(msg)
        sys.stdout.flush()

    unrelated_id = lib.individuals_in_vcf(
        founders_file)  # IDs of unrelated individuals
    random.shuffle(
        unrelated_id
    )  # random shuffling of the unrelated individuals to be selected as founders

    # check of having a sufficient number of unrelated individuals (i.e., 8) for the standard pedigree
    if len(unrelated_id) < 8:
        raise ValueError(
            "The population of unrelated individuals should have at least 8 members."
        )

    if feedback:
        sys.stdout.write(" [OK]\n")
        sys.stdout.flush()

    pedigree = DefaultPedigree()
    offset = 0  # used to select different founders for each run (reshuffle when all unrelated individuals are used)
    for idx_run in range(n_pedigree):
        if feedback:
            msg = "\nInitialization of pedigree {} ".format(idx_run + 1)
            with Spinner(msg):

                # create folder for the current run
                run_folder = "pedigree_{}".format(idx_run + 1)
                if output_folder is not None:
                    run_folder = os.path.join(output_folder, run_folder)
                os.makedirs(run_folder)

                # random selection of founders
                if 8 * offset + 8 >= len(
                        unrelated_id
                ):  # reached end of the list, cannot find additional 8 founders
                    offset = 0  # restart the selection
                    random.shuffle(
                        unrelated_id
                    )  # new shuffling of unrelated individuals to be selected as founders
                founder = unrelated_id[8 * offset:8 * offset + 8]

                # create the population based on the defined pedigree for the current run
                run_pop = pedigree.get_pedigree(founders_file, founder)

            sys.stdout.write("[OK]")
            sys.stdout.flush()

            msg = "\nSave pedigree {} in folder '{}'".format(
                idx_run + 1, run_folder)
            sys.stdout.write(msg)
        else:
            # create folder for the current run
            run_folder = "pedigree_{}".format(idx_run + 1)
            if output_folder is not None:
                run_folder = os.path.join(output_folder, run_folder)
            os.makedirs(run_folder)

            # random selection of founders
            if 8 * offset + 8 >= len(
                    unrelated_id
            ):  # the end of the list was reached, cannot find addition 8 founders
                offset = 0  # restart the selection
                random.shuffle(
                    unrelated_id
                )  # new random shuffling of unrelated individuals to be selected as founders
            founder = unrelated_id[8 * offset:8 * offset + 8]

            # create the population based on the defined pedigree for the current run
            run_pop = pedigree.get_pedigree(founders_file, founder)

        # save the population
        pedigree_file = "pedigree_{}{}".format(idx_run + 1, file_extension)
        population_file = "pedigree_{}{}".format(idx_run + 1, ".pop")
        lib.export2vcf(population=run_pop,
                       vcf_header_template=founders_file,
                       vcf_file=os.path.join(run_folder, pedigree_file))

        lib.save_object(run_pop, os.path.join(run_folder, population_file))

        if feedback:
            sys.stdout.write(" [OK]\n")
            sys.stdout.flush()

        offset = offset + 1

    if feedback:
        sys.stdout.write("\n")
        sys.stdout.flush()