Пример #1
0
def one_to_one_tuples(*,
                      kits: List[str],
                      output_prefix: Path
                      ) -> List[Optional[OneToOneAutosomeResult]]:
    '''Performs one-to-one autosomal analysis.

    Args:
        kits: one or more kit name or number tuples (comma seperated).
        output_prefix: the path prefix for the output files.
    '''

    kits_dict: Dict[str, Kit] = dict([(kit.number, kit) for kit in find_kits()])

    results: List[Optional[OneToOneAutosomeResult]] = []
    logging.info(f'processing {len(kits)} kit pairs.')
    for i, kit_tuple in enumerate(kits, 1):
        kit_one, kit_two = kit_tuple.split(',')
        logging.info(f'processing ({i}/{len(kits)}) kits {kit_one} and {kit_two}.')
        tuple_output_prefix = Path(str(output_prefix) + f'.{kit_one}-{kit_two}')
        result = one_to_one_api(kit_one=kit_one,
                                kit_two=kit_two,
                                output_prefix=tuple_output_prefix,
                                kits=kits_dict)
        if result is None:
            logging.warning(f'No 1:1 autosomal match found for kits {kit_one} and {kit_two}.')
        results.append(result)

    logging.info(f'writing output.')
    summary: Path = Path(str(output_prefix) + '.summary.txt')
    write_metrics(summary, results)

    return results
Пример #2
0
def _one_to_many(
        kit: str,
        output: Path,
        max_matches: Optional[int],
        kits: Optional[Dict[str, Kit]] = None,
        driver: Optional[WebDriver] = None) -> List['OneToManyAutosomeResult']:
    '''Performs one-to-many autosomal analysis.

    Args:
        kit: the name or number
        output: the output file.
        max_matches: the maximum # of matches to return
        kits: a mapping of kit name to kit, useful when performing many 1:1 analyses.

    Returns:
        A list of matches.
    '''
    results: List['OneToManyAutosomeResult'] = []

    _driver = main_page() if driver is None else driver

    if kits is None:
        kits = dict([(kit.number, kit) for kit in _ls(_driver)])

    if kit not in kits:
        kit = [n for n, k in kits.items() if k.name == kit][0]

    try:
        url = f'OneToMany0Tier2.php?kit_num={kit}'
        page = _driver.find_element_by_xpath('//a[@href="' + url + '"]')
        page.click()

        root = lxml.html.fromstring(_driver.page_source)
        for table in root.xpath("//table"):
            rows = [row for row in table.xpath('.//tr')]

            # check that the first column in the first row has value "Chr"
            tds = rows[0].xpath('.//td')
            first_column_value = tds[0].text
            if first_column_value != 'Kit':
                continue
            header = [td.text for td in tds]
            assert len(header) == 10, f'header: {header}'

            last_row: int = len(
                rows) if max_matches is None else max_matches + 1
            logging.info(f'Reading rows for {kit}')
            for row_num, row in enumerate(rows[1:last_row], 1):
                columns = [maybe_href(td) for td in row.xpath('.//td')]
                assert len(columns) == 10, f'columns: {columns}'
                d = dict(zip(header, columns))

                result = OneToManyAutosomeResult(
                    kit_one=kits[kit],
                    kit_two=Kit(name=d['Name'],
                                number=d['Kit'],
                                email=d['Email'],
                                testing_company=d['Testing Company']),
                    largest_segment=float(d['Largest Seg']),
                    total_half_match_segments=float(d['Total cM']),
                    most_recent_common_ancestor=float(d['Gen']),
                    num_snps=int(d['Overlap']),
                    date_compared=d['Date Compared'])
                results.append(result)
            logging.info(f'Returning {len(results)} results.')
    except Exception as e:
        _driver.close()
        raise e

    if driver is None:
        _driver.close()

    write_metrics(output, results)

    return results
Пример #3
0
def _one_to_one(
        kit_one: str,
        kit_two: str,
        output_prefix: Path,
        kits: Optional[Dict[str, Kit]] = None,
        driver: Optional[WebDriver] = None
) -> Optional['OneToOneAutosomeResult']:
    '''Performs one-to-one autosomal analysis.

    Args:
        kit_one: the first kit name or number
        kit_two: the second kit name or number
        output_prefix: the prefix for the output files.
        kits: a mapping of kit name to kit, useful when performing many 1:1 analyses.

    Returns:
        None if the analysis did not find any segments, otherwise the analysis results.
    '''
    _driver = main_page() if driver is None else driver

    if kits is None:
        kits = dict([(kit.number, kit) for kit in _ls(_driver)])

    if kit_one not in kits:
        kit_one = [n for n, k in kits.items() if k.name == kit_one][0]
    if kit_two not in kits:
        kit_two = [n for n, k in kits.items() if k.name == kit_two][0]

    try:
        url = 'v_compare1.php'
        page = _driver.find_element_by_xpath('//a[@href="' + url + '"]')
        page.click()

        kit1 = _driver.find_element_by_name('kit1')
        kit1.clear()
        kit1.send_keys(kit_one)

        kit2 = _driver.find_element_by_name('kit2')
        kit2.clear()
        kit2.send_keys(kit_two)

        submit = _driver.find_element_by_name('xsubmit')
        submit.click()

        segments: List[SegmentResult] = []
        root = lxml.html.fromstring(_driver.page_source)
        for table in root.xpath("//table"):
            rows = [row for row in table.xpath('.//tr')]

            # check that the first column in the first row has value "Chr"
            tds = rows[0].xpath('.//td')
            first_column_value = tds[0].text
            if first_column_value != 'Chr':
                continue

            for row in rows[1:]:
                columns = row.xpath('.//td')
                assert len(columns) == 5

                segment = SegmentResult(
                    chromosome=columns[0].text,
                    start=_comma_value_to_int(columns[1].text),
                    end=_comma_value_to_int(columns[2].text),
                    centimorgans=float(columns[3].text),
                    num_snps=_comma_value_to_int(columns[4].text))
                segments.append(segment)

        largest_segment: float = 0.0
        total_half_match_segments: float = 0.0
        pct_half_match_segments: float = 0.0
        most_recent_common_ancestor: float = -1.0
        shared_segments: int = 0
        num_snps: int = 0
        pct_snps_identical: float = 0.0
        version: str = 'none found'

        for line in _driver.page_source.split('\n'):
            line = line.rstrip('\r\n').strip()
            line = line.replace('<br>', '')

            if 'No shared DNA segments found' in line:
                assert len(segments) == 0
            elif 'Largest segment' in line:
                largest_segment = float(line.split(' = ')[1].split(' ')[0])
            elif 'Total Half-Match segments' in line:
                fields = line.split(' = ')[1].split(' ')
                total_half_match_segments = float(fields[0])
                pct_half_match_segments = float(fields[2].replace('(',
                                                                  '').replace(
                                                                      ')', ''))
            elif 'Estimated number of generations to MRCA' in line:
                most_recent_common_ancestor = float(
                    line.split(' = ')[1].split(' ')[0])
            elif 'shared segments found for this comparison' in line:
                shared_segments = int(line.split(' ')[0])
            elif 'SNPs used for this comparison' in line:
                num_snps = int(line.split(' ')[0])
            elif 'Pct SNPs are full identical' in line:
                pct_snps_identical = float(line.split(' ')[0])
            elif 'Ver:' in line:
                version = line.replace('<font size="2">Ver: ', '') \
                    .replace('</font>', '') \
                    .replace(' ', '-')

        final_result = None
        vars = [
            largest_segment, total_half_match_segments,
            pct_half_match_segments, most_recent_common_ancestor,
            shared_segments, num_snps, pct_snps_identical, version
        ]
        if all([v is not None for v in vars]):
            final_result = OneToOneAutosomeResult(
                kit_one=kits[kit_one],
                kit_two=kits[kit_two],
                segments=segments,
                largest_segment=largest_segment,
                total_half_match_segments=total_half_match_segments,
                pct_half_match_segments=pct_half_match_segments,
                most_recent_common_ancestor=most_recent_common_ancestor,
                shared_segments=shared_segments,
                num_snps=num_snps,
                pct_snps_identical=pct_snps_identical,
                version=version)

    except Exception as e:
        _driver.close()
        raise e

    if driver is None:
        _driver.close()

    if final_result is not None:
        summary: Path = Path(str(output_prefix) + '.summary.txt')
        write_metric(summary, final_result)
        detailed: Path = Path(str(output_prefix) + '.detailed.txt')
        write_metrics(detailed, final_result.segments)

    return final_result