def load_single(self, sample): variants_mongo.index_variants() vcf_files = sampleinfo_mongo.get_vcf_files() vcf_file = vcf_files[sample] self.__load_sample_variants(sample, vcf_file) self.__log_single_successfully_loaded(sample)
def load_all(self): if self.variant_type == 'orig': client, db = mongo.get_connection() vcf_files = sampleinfo_mongo.get_vcf_files() # CHECK IF THE VCFS ARE ALL VALID BEFORE STARTING for sample in vcf_files: vcf_file = vcf_files[sample] if not os.path.isfile(vcf_file): self.__log_invalid_vcf_file(vcf_file) sys.exit(1) pending_vcf_files = [] for sample in vcf_files: print sample vcf_file = vcf_files[sample] is_loaded = variants_mongo.is_sample_loaded(sample, self.variant_type, db) if is_loaded: self.__log_sample_already_loaded(sample) continue else: self.__log_adding_sample_to_queue(sample, vcf_file) pending_vcf_files.append((sample, vcf_file)) client.close() elif self.variant_type == 'hotspot': pending_vcf_files = self.__get_unsaved_hotspot_vcf_files() num_processors = 10 self.__parallel_process_vcf_files(pending_vcf_files, num_processors) self.__log_successfully_loaded()