def __init__(self, name, sequence, markers): """Class constructor Genotype Genotype Class is characterized by : Haplotype legacy of class : -A name, -a sequence, -a size who is the length of markers and -the list of there markers Genotype specific to class : #generate during the 1st run -A number of Homozygous (Hmz) markers ('A', 'C', 'G' or 'T') (default = 0) maybe '--' for the new_haplotype, -a number of Heterozygous (Htz) markers (ex : '--' or 'A/G'), (default = 0) -a list of the Htz indexs in the Genotype object sequence, -a list of Haplotype(s) object(s) who are similar to our genotype (sum of errors in each markers positions = ... 0 1 ou 2) -the size of this list (default = 0). -a list of probable combination of similar haplotypes that allows us to obtain the genotype. -the size of this list (default = 0). -a new haplotype list filled if there is at least 1 similar haplotype and no prbable combination. -the size of this list (default = 0). #generate during the 2nd run -a new list of probable combination of similar haplotypes that allows us to obtain the genotype.Because, we add in similar haplotype list the new haplotype created during the first run which are similar to our genotype -the size of this list (default = 0). something more? """ #Haplotype legacy of class Haplotype.__init__(self, name, sequence, markers) #Genotype specific to class self._hmz_nb_of_markers = 0 self._htz_nb_of_markers = 0 self._index_htz_markers = [] #liste des positions Htz dans seq du genotype (rend plus rapide la comparaison entre haplo possiblement combiné pour donner seq du génotype) #supp self._similar_haplotype = [] #une liste de sequences (eux même des liste de caractères) # = half_similarity hérité de Haplotype self._number_of_similar_haplotype = 0 #taille de la liste obtenue ci-dessus self._probable_haplotypes_combinaison = [] #liste de liste (ex: [[haplo1, haplo4], [haplo20, haplo79]]) self._number_of_probable_haplotypes_combinaison = 0 self._lst_of_new_haplotype = [] #une liste de nouveau(x) (pour le moment liste de la séqeunce seul) haplotype(s) créé à partir de l'objet Génotype en question self._number_of_new_created_haplotype = 0 #taille de la liste obtenue ci-dessus self._probable_haplotypes_combinaison_2_run = [] # nouvelle liste de combinaison self._number_of_probable_haplotypes_combinaison_2_run = 0
def __init__(self, name, sequence, markers): """Class constructor Candidat Haplotype legacy of class : -A name, -a sequence, -a size who is the length of markers and -the list of there markers """ #Haplotype legacy of class Haplotype.__init__(self, name, sequence, markers) #Candidat specific to class self._geno_ori = "" self._haplo_ori = ""
def __init__(self, name, sequence, markers): """Class constructor FastPhase Haplotype legacy of class : -A name, -a sequence, -a size who is the length of markers and -the list of there markers """ #Haplotype legacy of class Haplotype.__init__(self, name, sequence, markers) #FastPhase specific to class self._estimated_freq = "" self._estimated_square = ""
def pattern_selection(project_directory, **kwargs): logger = logging.getLogger(__name__) logger.info("BEGIN Pattern Selection") args = _set_parameters(**kwargs) start_time = time.time() _check_inputs(args['max_loci'], args['required_loci'], args['exclude_loci']) history = History(project_directory.make_new_file( "history", "pattern_selection_history"), "Pattern_Selection", project_directory.timestamp, param_dict=args) preprocessing_history = History( project_directory.get_parent_subdirectory_file( "history", "preprocessing_history_{}.txt".format( project_directory.get_parent_directory_timestamp())), "Preprocessing", exists=True) # Get JSON file path from preprocessing step json_file = preprocessing_history.get_path("PATTERN JSON") variant_matrix = preprocessing_history.get_path("VARIANT SITE MATRIX FILE") sep = { 'comma': ",", "space": " ", "tab": "\t" }[preprocessing_history.get_parameter("SEP")] # Get flag file path from preprocessing step flag_file = preprocessing_history.get_path("PRIMER ZONE FLAGS") primer_zone_size = preprocessing_history.get_parameter("PZ_SIZE") history.add_path("PATTERN JSON", json_file) logger.info("Reading from pattern JSON: %s", json_file) # Read in pattern JSON patterns = Patterns() patterns.load_patterns(json_file) if len(args['exclude_loci']): patterns.remove_sites(args['exclude_loci']) if len(args['required_loci']): patterns.add_required_sites(args['required_loci']) if len(args['exclude_strains']): patterns.remove_strains(args['exclude_strains']) patterns.set_resolution(args['res'], args['stop_at_res']) best_set = _get_minimum_spanning_set( patterns, args['reps'], args['max_loci'], args['max_res'], args['n_threads'], int(preprocessing_history.get_parameter("PZ_SIZE"))) haplotype_file = project_directory.make_new_file("minimum_spanning_set", ".haplotype", "csv") amplicon_json = project_directory.make_new_file("minimum_spanning_set", ".amplicons", "json") haplotype_matrix = project_directory.make_new_file("minimum_spanning_set", "haplotypes", "csv") amplicon_matrix = project_directory.make_new_file("minimum_spanning_set", "amplicons", "csv") pattern_matrix = project_directory.make_new_file("minimum_spanning_set", "patterns", "csv") summary_file = project_directory.make_new_file("summary", "summary") haplotype = Haplotype(patterns, best_set, flag_file, primer_zone_size, variant_matrix, sep) haplotype.write_haplotype(haplotype_file) history.add_path("Haplotype File", haplotype_file) haplotype.write_json(amplicon_json) history.add_path("Amplicon JSON", amplicon_json) haplotype.write_summary(summary_file) history.add_path("Summary", summary_file) haplotype.write_output(haplotype_matrix, pattern_matrix, amplicon_matrix) history.add_path("Haplotype Matrix", haplotype_matrix) history.add_path("Amplicon Matrix", amplicon_matrix) history.add_path("Pattern Matrix", pattern_matrix) logger.info("FINISHED Pattern Selection") run_time = time.time() - start_time history.add_other("Run Time", run_time) history.write()