Exemplo n.º 1
0
def test_classify_manual_svtype_from_file():
    bedpe1 = viola.read_bedpe(StringIO(data))
    bedpe2 = viola.read_bedpe(StringIO(data))
    ls_conditions = [
        small_del, large_del, small_dup, large_dup, small_inv, tra
    ]
    ls_names = [
        'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra'
    ]
    multibedpe = viola.MultiBedpe([bedpe1, bedpe2], ['bedpe1', 'bedpe2'])
    path = os.path.join(HERE, '../bedpe/data/example_definition.txt')
    result = multibedpe.classify_manual_svtype(definitions=path)
    manual_sv_type = multibedpe.manual_sv_type
    manual_sv_type.set_index('id', inplace=True)
    manual_sv_type_expected = pd.read_csv(StringIO(data_expected),
                                          sep='\t',
                                          names=('id', 'value_idx',
                                                 'manual_sv_type'))
    manual_sv_type_expected.set_index('id', inplace=True)
    pd.testing.assert_frame_equal(manual_sv_type,
                                  manual_sv_type_expected,
                                  check_like=True)

    result_expected = pd.DataFrame([[2, 3, 1, 0, 2, 2, 2],
                                    [2, 3, 1, 0, 2, 2, 2]])
    result_expected.columns = ls_names + ['others']
    result_expected.columns.name = 'manual_sv_type'
    result_expected.index = ['bedpe1', 'bedpe2']
    result_expected.index.name = 'patients'
    pd.testing.assert_frame_equal(result, result_expected)
Exemplo n.º 2
0
def test_classify_manual_svtype_exclude_empty():
    bedpe1 = viola.read_bedpe(StringIO(data))
    bedpe2 = viola.read_bedpe(StringIO(data))
    empty1 = viola.read_bedpe(StringIO(data_empty))
    empty2 = viola.read_bedpe(StringIO(data_empty))
    ls_conditions = [
        small_del, large_del, small_dup, large_dup, small_inv, tra
    ]
    ls_names = [
        'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra'
    ]
    multibedpe = viola.MultiBedpe([bedpe1, empty1, bedpe2, empty2],
                                  ['bedpe1', 'empty1', 'bedpe2', 'empty2'])
    result = multibedpe.classify_manual_svtype(ls_conditions=ls_conditions,
                                               ls_names=ls_names,
                                               exclude_empty_cases=True)
    manual_sv_type = multibedpe.manual_sv_type
    manual_sv_type.set_index('id', inplace=True)
    manual_sv_type_expected = pd.read_csv(StringIO(data_expected),
                                          sep='\t',
                                          names=('id', 'value_idx',
                                                 'manual_sv_type'))
    manual_sv_type_expected.set_index('id', inplace=True)
    pd.testing.assert_frame_equal(manual_sv_type,
                                  manual_sv_type_expected,
                                  check_like=True)

    result_expected = pd.DataFrame([[2, 3, 1, 0, 2, 2, 2],
                                    [2, 3, 1, 0, 2, 2, 2]])
    result_expected.columns = ls_names + ['others']
    result_expected.columns.name = 'manual_sv_type'
    result_expected.index = ['bedpe1', 'bedpe2']
    result_expected.index.name = 'patients'
    pd.testing.assert_frame_equal(result, result_expected)
Exemplo n.º 3
0
class TestReadBedpe:
    data = """chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tname\tscore\tstrand1\tstrand2
chr1\t10\t13\tchr2\t20\t30\ttest1\t10\t+\t+
chr3\t100\t130\tchr3\t210\t230\ttest2\t30\t-\t+
"""
    b = StringIO(data)
    obj = viola.read_bedpe(b)

    def test_read_bedpe(self):
        b = StringIO(self.data)
        viola.read_bedpe(b)

    def test_svpos(self):
        expected_data = """id\tchrom1\tpos1\tchrom2\tpos2\tstrand1\tstrand2\tqual\tsvtype\tref\talt
test1\tchr1\t12\tchr2\t26\t+\t+\t10\tBND\tN\tN]chr2:26]
test2\tchr3\t116\tchr3\t221\t-\t+\t30\tDUP\tN\t<DUP>
        """
        df_svpos = self.obj.get_table('positions')
        df_expected = pd.read_csv(StringIO(expected_data), sep="\t")
        pd.testing.assert_frame_equal(df_svpos, df_expected)

    def test_create_alt_field_from_position(self):
        test_data = """id\tchrom1\tpos1\tchrom2\tpos2\tstrand1\tstrand2\tsvtype\tref
test1\tchr1\t10\tchr2\t10\t+\t-\tBND\tN
test2\tchr1\t10\tchr1\t10\t+\t-\tDEL\tN
"""
        b = StringIO(test_data)
        df_svpos = pd.read_csv(b, sep="\t")
        result = viola.io.parser.create_alt_field_from_position(df_svpos)
def test_classify_manual_svtype():
    bedpe = viola.read_bedpe(StringIO(data))
    ls_conditions = [
        small_del, large_del, small_dup, large_dup, small_inv, tra
    ]
    ls_names = [
        'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra'
    ]
    result = bedpe.classify_manual_svtype(ls_conditions=ls_conditions,
                                          ls_names=ls_names)
    manual_sv_type = bedpe.manual_sv_type
    manual_sv_type.set_index('id', inplace=True)
    manual_sv_type_expected = pd.read_csv(StringIO(data_expected),
                                          sep='\t',
                                          names=('id', 'value_idx',
                                                 'manual_sv_type'))
    manual_sv_type_expected.set_index('id', inplace=True)
    pd.testing.assert_frame_equal(manual_sv_type,
                                  manual_sv_type_expected,
                                  check_like=True)

    result_expected = pd.Series([2, 3, 1, 0, 2, 2, 2])
    result_expected.index = ls_names + ['others']
    result_expected.name = 'manual_sv_type'
    pd.testing.assert_series_equal(result, result_expected)
Exemplo n.º 5
0
def test_remove_info():
    bedpe = viola.read_bedpe(StringIO(data))
    bedpe_copy = bedpe.copy()

    test_info = pd.DataFrame({'id': ['test1', 'test2'], 'value_idx': [0, 0], 'test': ['t', 'u']})
    bedpe.add_info_table('test', test_info)
    bedpe.remove_info_table('test')
    viola.testing.assert_bedpe_equal(bedpe, bedpe_copy)
Exemplo n.º 6
0
def test_to_bedpe():
    bedpe = viola.read_bedpe(StringIO(data))
    bedpe.to_bedpe(os.path.join(HERE, 'data/output.bedpe'))
    bedpe.to_bedpe(os.path.join(HERE, 'data/output.svlen.bedpe'),
                   custom_infonames=['svlen'])
    assert filecmp.cmp(os.path.join(HERE, 'data/output.bedpe'),
                       os.path.join(HERE, 'data/bedpe_expected.bedpe'))
    assert filecmp.cmp(os.path.join(HERE, 'data/output.svlen.bedpe'),
                       os.path.join(HERE, 'data/output.svlen.expected.bedpe'))
Exemplo n.º 7
0
def generate_feature_matrix(input_dir, input_files, input_files_id, format_,
                            caller, svtype_col_name, as_breakpoint,
                            definitions, output):
    """
    Generate feature matrix from VCF or BEDPE files.
    """
    if format_ == 'bedpe':
        if (input_dir is None) & (input_files is None):
            return
        elif (input_files is None):
            data = viola.read_bedpe_multi(input_dir,
                                          svtype_col_name=svtype_col_name)
        elif (input_dir is None):
            ls_input = input_files.split(',')
            ls_bedpe = [
                viola.read_bedpe(path, svtype_col_name=svtype_col_name)
                for path in ls_input
            ]
            if input_files_id is None:
                ls_names = range(len(ls_bedpe))
            else:
                ls_names = input_files_id.split(',')
            data = viola.MultiBedpe(ls_bedpe, ls_names)
        else:
            return
    else:
        if (input_dir is None) & (input_files is None):
            return
        elif (input_files is None):
            data = viola.read_vcf_multi(input_dir,
                                        variant_caller=caller,
                                        as_breakpoint=as_breakpoint)
        elif (input_dir is None):
            ls_input = input_files.split(',')
            if as_breakpoint:
                ls_vcf = [
                    viola.read_vcf(
                        path, variant_caller=caller).breakend2breakpoint()
                    for path in ls_input
                ]
            else:
                ls_vcf = [
                    viola.read_vcf(path, variant_caller=caller)
                    for path in ls_input
                ]

            if input_files_id is None:
                ls_names = range(len(ls_vcf))
            else:
                ls_names = input_files_id.split(',')
            data = viola.MultiBedpe(ls_vcf, ls_names)
        else:
            return

    result = data.classify_manual_svtype(definitions=definitions)
    result.to_csv(output, sep='\t')
Exemplo n.º 8
0
def test_append_info():
    bedpe = viola.read_bedpe(StringIO(data))
    test_info = pd.DataFrame({
        'id': ['test1', 'test2'],
        'value_idx': [0, 0],
        'test': ['t', 'u']
    })
    bedpe.add_info_table('test', test_info)
    pd.testing.assert_frame_equal(bedpe._odict_alltables['test'], test_info)
    pd.testing.assert_frame_equal(bedpe._odict_df_info['test'], test_info)
    assert 'test' in bedpe._ls_infokeys
def test_classify_manual_svtype_from_file():
    bedpe = viola.read_bedpe(StringIO(data))
    path = os.path.join(HERE, 'data/example_definition.txt')
    result = bedpe.classify_manual_svtype(definitions=path)
    manual_sv_type = bedpe.manual_sv_type
    manual_sv_type.set_index('id', inplace=True)
    manual_sv_type_expected = pd.read_csv(StringIO(data_expected),
                                          sep='\t',
                                          names=('id', 'value_idx',
                                                 'manual_sv_type'))
    manual_sv_type_expected.set_index('id', inplace=True)
    pd.testing.assert_frame_equal(manual_sv_type,
                                  manual_sv_type_expected,
                                  check_like=True)

    result_expected = pd.Series([2, 3, 1, 0, 2, 2, 2])
    ls_names = [
        'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra'
    ]
    result_expected.index = ls_names + ['others']
    result_expected.name = 'manual_sv_type'
    pd.testing.assert_series_equal(result, result_expected)
Exemplo n.º 10
0
def test_copy():
    bedpe = viola.read_bedpe(StringIO(data))
    bedpe_copy = bedpe.copy()
    viola.testing.assert_bedpe_equal(bedpe, bedpe_copy)
Exemplo n.º 11
0
 def test_read_bedpe(self):
     b = StringIO(self.data)
     viola.read_bedpe(b)
def test_classify_manual_svtype_from_article():
    bedpe = viola.read_bedpe(StringIO(data))
    try:
        result = bedpe.classify_manual_svtype(definitions="article")
    except TypeError:
        pass
def test_read_bedpe_with_empty():
    bedpe1 = viola.read_bedpe(StringIO(data))
    bedpe2 = viola.read_bedpe(StringIO(data))
    bedpe_empty = viola.read_bedpe(StringIO(data_empty))
    multibedpe = viola.MultiBedpe([bedpe1, bedpe2, bedpe_empty], ['bedpe1', 'bedpe2', 'empty'])