Exemplo n.º 1
0
 hapsb_ind(
     iid,
     chs=range(1, 23),
     path_targets_prefix=f"{basepath}/hdf5",
     h5_path1000g=
     "/mnt/archgen/users/yilei/Data/1000G/1000g1240khdf5/all1240/chr",
     meta_path_ref=
     "/mnt/archgen/users/yilei/Data/1000G/1000g1240khdf5/all1240/meta_df_all.csv",
     folder_out=f"{basepath}/hapRoh/",
     prefix_out="",
     e_model="readcount_contam",
     p_model="SardHDF5",
     post_model="Standard",
     processes=args.processes,
     delete=False,
     output=True,
     save=True,
     save_fp=False,
     n_ref=2504,
     diploid_ref=True,
     exclude_pops=[],
     readcounts=True,
     random_allele=False,
     c=0.05,
     roh_min_l_final=0.06,
     roh_in=1,
     roh_out=20,
     roh_jump=300,
     e_rate=err,
     e_rate_ref=1e-3,
     logfile=True,
     combine=True,
     file_result="_roh_full.csv")
Exemplo n.º 2
0
 hapsb_ind(
     iid=iid,
     chs=range(1, 23),
     path_targets=path_target,  # The path before the .ind, .snp, .geno
     h5_path1000g='./Data/1000Genomes/HDF5/1240kHDF5/all1240int8/chr',
     meta_path_ref='./Data/1000Genomes/Individuals/meta_df_all.csv',
     folder_out="./Empirical/1240k/SA_Readcounts/IPK12_rep/",
     prefix_out='',
     e_model='haploid',
     p_model='MosaicHDF5',
     post_model='Standard',
     processes=1,
     delete=True,
     output=True,
     save=True,
     save_fp=False,
     n_ref=2504,
     exclude_pops=[],
     readcounts=False,
     random_allele=True,
     roh_in=1,
     roh_out=20,
     roh_jump=300,
     e_rate=0.01,
     e_rate_ref=0.0,
     cutoff_post=0.999,
     max_gap=0,
     roh_min_l=0.01,
     logfile=True,
     combine=True,
     file_result='_roh_full.csv')
Exemplo n.º 3
0
 hapsb_ind(
     iid,
     chs=range(1, 2),
     path_targets=f"{outFolder}/data.h5",
     h5_path1000g=path1000G,
     meta_path_ref=
     "/mnt/archgen/users/yilei/Data/1000G/1000g1240khdf5/all1240/meta_df_all.csv",
     folder_out=f"{outFolder}/hapRoh/",
     prefix_out="",
     e_model="readcount_contam",
     p_model="SardHDF5",
     post_model="Standard",
     processes=1,
     delete=True,
     output=True,
     save=True,
     save_fp=False,
     c=con,
     conPop=["CEU"],
     n_ref=2504,
     diploid_ref=True,
     exclude_pops=["CHB"],
     readcounts=True,
     random_allele=False,
     roh_in=1,
     roh_out=20,
     roh_jump=300,
     e_rate=0.01,
     e_rate_ref=1e-3,
     cutoff_post=0.999,
     max_gap=0.005,
     logfile=False,
     combine=True,
     file_result="_roh_full.csv")
Exemplo n.º 4
0
        raise RuntimeError(f"Index {i} out of Range of High Coverage ancients.") 
    iid = df[id_col].values[i]
    return iid

#########################################################
#########################################################

if __name__ == "__main__":
    if len(sys.argv) < 2:
        raise RuntimeError("Script needs argument (indiviual i)")
    
    run_nr = int(sys.argv[1]) # The Parameter passed to the Python Script from outside
    #df_anno = load_eigenstrat_anno(min_snps=400000)
    #iid = get_iid_from_df(df_anno, run_nr, id_col="Instance ID")
    
    #df_anno = load_meta_csv(path = "./Data/ReichLabEigenstrat/Raw/meta.v42_additional.csv",
    #                        min_snps=400000)
    df_anno = load_meta_csv(path = "./Data/ReichLabEigenstrat/Raw/meta.v42_old.csv",
                            min_snps=400000)
    iid=get_iid_from_df(df_anno, run_nr, id_col="iid")
    
    hapsb_ind(iid, chs=range(1, 23), processes=1, delete=False, output=True, 
               save=True, save_fp=False, n_ref=2504, exclude_pops=[], 
               e_model='haploid', p_model='EigenstratPacked', readcounts=False, 
               destroy_phase=True, post_model='Standard', 
               path_targets="./Data/ReichLabEigenstrat/Raw.v42.4/v42.4.1240K", 
               h5_path1000g='./Data/1000Genomes/HDF5/1240kHDF5/all1240int8/chr', 
               meta_path_ref='./Data/1000Genomes/Individuals/meta_df_all.csv', 
               base_out_folder='./Empirical/Eigenstrat/Reichall/v42_core/', prefix_out='', 
               roh_in=1, roh_out=20, roh_jump=300, e_rate=0.01, e_rate_ref=0.0, max_gap=0, 
               cutoff=0.999, l_cutoff=0.01, logfile=True, combine=True, file_name='_roh_full.csv')