def _populate_lib_info(self, sample_dir):
        lib_info = Info()
        sample_dir = os.path.abspath(sample_dir)
        sample_name = os.path.basename(sample_dir)
        project_name = os.path.basename(os.path.dirname(sample_dir))
        lib_info[ELEMENT_SAMPLE_INTERNAL_ID]= sample_name
        lib_info[ELEMENT_LIBRARY_INTERNAL_ID]= sample_name
        lib_info[ELEMENT_PROJECT]= project_name
        plate, well = self.get_plate_id_and_well(sample_name)
        lib_info[ELEMENT_SAMPLE_PLATE] = plate
        lib_info[ELEMENT_SAMPLE_PLATE_WELL] = well
        fastq_file = glob.glob(os.path.join(sample_dir,"*_R1.fastq.gz"))[0]
        external_sample_name = os.path.basename(fastq_file)[:-len("_R1.fastq.gz")]
        lib_info[ELEMENT_SAMPLE_EXTERNAL_ID]= external_sample_name
        fastqc_file = os.path.join(sample_dir,external_sample_name+"_R1_fastqc.html")
        if os.path.exists(fastqc_file):
            nb_reads = get_nb_sequence_from_fastqc_html(fastqc_file)
            lib_info[ELEMENT_NB_READS_PASS_FILTER]= int(nb_reads)
            lib_info[ELEMENT_NB_BASE]= int(nb_reads)*300

        bamtools_path = glob.glob(os.path.join(sample_dir, 'bamtools_stats.txt'))
        if not bamtools_path:
            bamtools_path = glob.glob(os.path.join(sample_dir,'.qc', 'bamtools_stats.txt'))
        if bamtools_path:
            total_reads, mapped_reads, duplicate_reads, proper_pairs = parse_bamtools_stats(bamtools_path[0])
            lib_info[ELEMENT_NB_READS_IN_BAM]= int(total_reads)
            lib_info[ELEMENT_NB_MAPPED_READS]= int(mapped_reads)
            lib_info[ELEMENT_NB_DUPLICATE_READS]= int(duplicate_reads)
            lib_info[ELEMENT_NB_PROPERLY_MAPPED]= int(proper_pairs)

        yaml_metric_paths = glob.glob(os.path.join(sample_dir, '*%s-sort-highdepth-stats.yaml'%external_sample_name))
        if not yaml_metric_paths:
            yaml_metric_paths = glob.glob(os.path.join(sample_dir, '.qc', '*%s-sort-highdepth-stats.yaml'%external_sample_name))
        if yaml_metric_paths:
            yaml_metric_path = yaml_metric_paths[0]
            median_coverage  = parse_highdepth_yaml_file(yaml_metric_path)
            lib_info[ELEMENT_MEDIAN_COVERAGE]= median_coverage
        else:
            logging.critical('Missing %s-sort-highdepth-stats.yaml'%sample_name)

        bed_file_paths = glob.glob(os.path.join(sample_dir,'*%s-sort-callable.bed'%external_sample_name))
        if not bed_file_paths:
            bed_file_paths = glob.glob(os.path.join(sample_dir, '.qc', '*%s-sort-callable.bed'%external_sample_name))
        if bed_file_paths:
            bed_file_path = bed_file_paths[0]
            coverage_per_type = parse_callable_bed_file(bed_file_path)
            callable_bases = coverage_per_type.get('CALLABLE')
            total = sum(coverage_per_type.values())
            lib_info[ELEMENT_PC_BASES_CALLABLE]= callable_bases/total
        else:
            logging.critical('Missing *%s-sort-callable.bed'%sample_name)

        sex_file_paths = glob.glob(os.path.join(sample_dir,'%s.sex'%external_sample_name))
        if not sex_file_paths:
            sex_file_paths = glob.glob(os.path.join(sample_dir,'.qc','%s.sex'%external_sample_name))
        if sex_file_paths:
            with open(sex_file_paths[0]) as open_file:
                sex = open_file.read().strip()
                gender_from_lims = self.get_sex_from_lims(sample_name)
                lib_info[ELEMENT_GENDER]= match_gender(sex, gender_from_lims)
        genotype_file_paths = glob.glob(os.path.join(sample_dir,'%s_genotype_validation.txt'%external_sample_name))
        if genotype_file_paths:
            samples = parse_genotype_concordance(genotype_file_paths[0])
            total_snps = sum(samples[sample_name].values())
            no_call = samples[sample_name].get('no_call_seq') + samples[sample_name].get('no_call_chip')
            matching = samples[sample_name].get('matching_snps')
            lib_info[ELEMENT_GENOTYPE_PC_NOCALL] = float(no_call) / float(total_snps)
            lib_info[ELEMENT_GENOTYPE_PC_MISMATCH] = float(matching) / float(total_snps)
        return lib_info
 def test_parse_bamtools_stats(self):
     self.assertEqual(parse_bamtools_stats(self.bamtools_stat_file), (988805087, 975587288, 171911966, 949154225))