Exemplo n.º 1
0
def get_report_kplets(limit_to=300, load_locations=None):

    _db = DbClass()

    _db.cmd = """select apc.*, s1.cnt, s1.wgt, s1.an
                from (
                        select ap.id ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an
                        from archea_3plets ap
                        inner join archea_3plets_win10 apw on ap.id = apw.kplet_id
                        inner join archea_win10_files awf on apw.file_id = awf.id
                        inner join sources s on awf.source_id=s.id
                        inner join weights w on w.genome_id=s.genome_id
                        group by ap.id
                        having count(distinct s.genome_id)>1 ) s1
                inner join archea_3plets_codes apc on s1.id=apc.id
                order by s1.wgt desc
                limit 0,%d""" % limit_to

    out_list = []

    for row in _db.retrieve():
        id = row[0]
        kplet_codes = (row[1:4])
        if len(set(kplet_codes)) != 3:
            continue
        count = row[4]
        files = row[6].split(',')
        tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files)
        out_list.append(tmp_kplet)

    _path = neighborhoods_path()
    if load_locations:
        [kplet.load_locations(_path) for kplet in out_list]

    return out_list
Exemplo n.º 2
0
def insert_source_duplets(kplets_pile, profile2id, source_id):

    sql_insert_kplet = """insert ignore into prok1402_all_adj_duplet (kplet_1, kplet_2, multidomain) values \n"""
    sql_insert_kplet_file = """insert into prok1402_all_adj_duplet_source (kplet_id, source_id) values \n"""

    for (duplet, fname, multidomain) in kplets_pile:

        terms = sorted(list(duplet))
        terms = [profile2id[_] for _ in terms]

        sql_insert_kplet += """(%s, %s, %d),\n""" % (terms[0], terms[1], multidomain)

        sql_insert_kplet_file += """((select id from prok1402_all_adj_duplet where """ + \
                                  """kplet_1=%s and kplet_2=%s and multidomain=%d), %d),\n""" % \
                                  (terms[0], terms[1], multidomain, source_id)

    sql_insert_kplet_file = sql_insert_kplet_file[:-2]
    sql_insert_kplet_file += ";"

    sql_insert_kplet = sql_insert_kplet[:-2]
    sql_insert_kplet += ";"

    _db = DbClass()

    _db.cmd = sql_insert_kplet
    _db.execute()

    _db.cmd = sql_insert_kplet_file
    _db.execute()

    _db.commit()
Exemplo n.º 3
0
def get_code_kplet(kplet_id):

    _db = DbClass()

    _db.cmd = """select kplet_1, kplet_2, kplet_3, kplet_4, kplet_5 from bacteria_5plets_codes where id = %s""" % kplet_id

    return _db.retrieve()[0]
Exemplo n.º 4
0
def org2src_src2files_map(files):

    _db = DbClass()

    _db.cmd = """select awf.name, s.name, g.name
                  from bacteria_win10_files awf
                  inner join sources s on awf.source_id=s.id
                  inner join genomes g on s.genome_id = g.id
                  where awf.name in ('%s')""" % "','".join(files)

    _org2src = {}
    _src2files = {}

    for row in _db.retrieve():
        parts = row
        [_file, _src, _org] = parts
        if _org in _org2src:
            _org2src[_org].update([_src])
        else:
            _org2src[_org] = set([_src])

        if _src in _src2files:
            _src2files[_src].update([_file])
        else:
            _src2files[_src] = set([_file])

    return _org2src, _src2files
Exemplo n.º 5
0
def insert_duplets(kplets_pile, profile2id, file2id):

    sql_insert_kplet = """insert ignore into prok1402_baited_duplet (kplet_1, kplet_2) values \n"""
    sql_insert_kplet_file = """insert ignore into prok1402_baited_duplet_file (kplet_id, file_id) values \n"""

    for (duplet, fname) in kplets_pile:

        terms = sorted(list(duplet))
        terms = [profile2id[_] for _ in terms]

        fname = os.path.basename(fname)
        file_id = file2id[fname]

        sql_insert_kplet += """(%s, %s),\n""" % (terms[0], terms[1])

        sql_insert_kplet_file += """((select id from prok1402_baited_duplet where """ + \
                                  """kplet_1=%s and kplet_2=%s), %d),\n""" % (terms[0], terms[1] ,file_id)

    sql_insert_kplet_file = sql_insert_kplet_file[:-2]
    sql_insert_kplet_file += ";"

    sql_insert_kplet = sql_insert_kplet[:-2]
    sql_insert_kplet += ";"

    _db = DbClass()

    _db.cmd = sql_insert_kplet
    _db.execute()

    _db.cmd = sql_insert_kplet_file
    _db.execute()

    _db.commit()
Exemplo n.º 6
0
def get_report_kplets(limit_to=300, load_locations=None):

    sql_cmd = """select apc.*, s1.cnt, s1.wgt, s1.an
                from (
                        select ap.id ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an
                        from archea_4plets ap
                        inner join archea_4plets_win10 apw on ap.id = apw.kplet_id
                        inner join archea_win10_files awf on apw.file_id = awf.id
                        inner join sources s on awf.source_id=s.id
                        inner join weights w on w.genome_id=s.genome_id
                        group by ap.id
                        having count(distinct s.genome_id)>1 ) s1
                inner join archea_4plets_codes apc on s1.id=apc.id
                order by s1.wgt desc
                limit 0,%d""" % limit_to

    _db = DbClass()
    _db.cmd = sql_cmd
    out_list = []

    for row in _db.retrieve():
        id = row[0]
        kplet_codes = (row[1:5])
        if len(set(kplet_codes)) != 4:
            continue
        count = row[5]
        files = row[7].split(',')
        tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files)
        out_list.append(tmp_kplet)

    _path = neighborhoods_path()
    if load_locations:
        [kplet.load_locations(_path) for kplet in out_list]

    return out_list
Exemplo n.º 7
0
def insert_kplet(profiles):
    profile2id = t.map_profile2id(profiles)

    _db = DbClass()

    sql_cmd = """insert into archea_4plets (kplet_1, kplet_2, kplet_3, kplet_4) values (%d, %d, %d, %d)"""
    sql_cmd = sql_cmd % (profile2id[profiles[0]], profile2id[profiles[1]], profile2id[profiles[2]], profile2id[profiles[3]])
    _db.cmd = sql_cmd
    _db.execute()
Exemplo n.º 8
0
def get_code_kplet(kplet_id):
    _sql_cmd = """SELECT cp1.code, cp2.code, cp3.code, cp4.code
                    FROM PatternQuest.archea_4plets ap
                    inner join cdd_profiles cp1 on cp1.id = ap.kplet_1
                    inner join cdd_profiles cp2 on cp2.id = ap.kplet_2
                    inner join cdd_profiles cp3 on cp3.id = ap.kplet_3
                    inner join cdd_profiles cp4 on cp4.id = ap.kplet_4
                    where ap.id = %d""" % kplet_id
    _db = DbClass()
    _db.cmd = _sql_cmd
    return _db.retrieve()
Exemplo n.º 9
0
def archea_kplet_ids2files(id_list):

    _sql_cmd = """select distinct awf.name
                  from archea_5plets ap
                  inner join archea_5plets_win10 apw on ap.id = apw.kplet_id
                  inner join archea_win10_files awf on apw.file_id = awf.id
                  where ap.id in (%s)""" % " , ".join(id_list)

    _db = DbClass()
    _db.cmd = _sql_cmd

    return [l[0] for l in _db.retrieve()]
Exemplo n.º 10
0
def map_profile2id(profile_list):

    _db = DbClass()
    if profile_list == 'all':
        sql_cmd = """select code, id from cdd_profiles"""
    else:
        sql_cmd = """select code, id from cdd_profiles where code in ('%s')"""
        sql_cmd = sql_cmd % "','".join(profile_list)

    _db.cmd = sql_cmd

    return {row[0]: int(row[1]) for row in _db.retrieve()}
Exemplo n.º 11
0
def get_multiple_kplets():

    _sql_cmd = """select  ap.id, count(*) cnt, group_concat(convert(apw.file_id, char(15))) as file_ids
                  from archea_5plets ap
                  inner join archea_5plets_win10 apw on ap.id = apw.kplet_id
                  group by ap.id
                  having count(*)>1
                  order by cnt desc"""
    _db = DbClass()
    _db.cmd = _sql_cmd

    return _db.retrieve()
Exemplo n.º 12
0
def map_baited_file2id():

    _db = DbClass()
    _db.cmd = """ select name, id from prok1402_baited_files"""
    rows = _db.retrieve()

    file2id = {}
    id2file = {}

    for row in rows:
        file2id[row[0]] = row[1]
        id2file[row[1]] = row[0]

    return file2id, id2file
Exemplo n.º 13
0
def get_kplet_id(profiles):
    profile2id = t.map_profile2id(profiles)

    _db = DbClass()

    sql_cmd = """select id from archea_4plets where kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d"""
    sql_cmd = sql_cmd % (profile2id[profiles[0]], profile2id[profiles[1]], profile2id[profiles[2]], profile2id[profiles[3]])

    _db.cmd = sql_cmd

    rows = _db.retrieve()

    if rows:
        return rows[0][0]
    return None
Exemplo n.º 14
0
def get_report_kplet(kplet_id, id2cdd, load_locations=None):

    _db = DbClass()
    _db.cmd = """SET group_concat_max_len=1500000;"""
    _db.execute()
    _db.cmd = """select ap.* ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an
                from bacteria_5plets ap
                inner join bacteria_5plets_win10 apw on ap.id = apw.kplet_id
                inner join bacteria_win10_files awf on apw.file_id = awf.id
                inner join sources s on awf.source_id=s.id
                inner join weights w on w.genome_id=s.genome_id
                where ap.id=%d
                group by ap.id""" % kplet_id

    out_list = []

    for row in _db.retrieve():
        id = row[0]
        kplet_codes = [id2cdd[int(_id)] for _id in row[1:6]]

        if len(set(kplet_codes)) != 5:
            continue
        count = row[6]
        files = row[8].split(',')
        tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files)
        out_list.append(tmp_kplet)

    _path = neighborhoods_path()
    if load_locations:
        [kplet.load_locations(_path) for kplet in out_list]

    return out_list[0]
Exemplo n.º 15
0
def get_report_kplets(id2cdd, limit_to=500, load_locations=None):

    _db = DbClass()
    _db.cmd = """SET group_concat_max_len=1500000"""
    _db.execute()

    _db.cmd = """select ap.* ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an
                 from bacteria_3plets ap
                 inner join bacteria_3plets_win10 apw on ap.id = apw.kplet_id
                 inner join bacteria_win10_files awf on apw.file_id = awf.id
                 inner join sources s on awf.source_id=s.id
                 inner join weights w on w.genome_id=s.genome_id
                 group by ap.id
                 having count(distinct s.genome_id)>1
                 order by wgt desc
                 limit 0, %d""" % limit_to

    out_list = []

    for row in _db.retrieve():
        id = row[0]
        kplet_codes = ([id2cdd[int(_id)] for _id in row[1:4]])
        if len(set(kplet_codes)) != 3:
            continue
        count = row[4]
        weight = row[5]
        files = row[6].split(',')
        tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files)
        out_list.append(tmp_kplet)

    _path = neighborhoods_path()
    if load_locations:
        [kplet.load_locations(_path) for kplet in out_list]

    return out_list
Exemplo n.º 16
0
def get_report_kplets(id2cdd, limit_to=500):

    _db = DbClass()
    _db.cmd = """SET group_concat_max_len=1500000"""
    _db.execute()

    _db.cmd = """select cp.*, count(distinct cwf.id) as cnt, group_concat(cwf.name) as files
                from crispr_2plets cp
                inner join crispr_2plets_wgs cpw on cp.id = cpw.kplet_id
                inner join crispr_wgs_files cwf on cpw.file_id = cwf.id
                group by cp.id
                having count(distinct cwf.id)>1
                order by cnt desc
                limit 0, %d""" % limit_to

    out_list = []

    for row in _db.retrieve():
        id = row[0]
        kplet_codes = ([id2cdd[int(_id)] for _id in row[1:3]])
        if len(set(kplet_codes)) != 2:
            continue
        count = row[3]
        files = row[4].split(',')
        tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files)
        out_list.append(tmp_kplet)

    return out_list
Exemplo n.º 17
0
def file2src_src2org_map(files):

    _db = DbClass()
    _db.cmd = """select awf.name, s.name, g.name
                  from bacteria_win10_files awf
                  inner join sources s on awf.source_id=s.id
                  inner join genomes g on s.genome_id = g.id
                  where awf.name in ('%s')""" % "','".join(files)

    _src2org = {}
    _file2src = {}
    for row in _db.retrieve():
        parts = row
        [_file, _src, _org] = parts
        _src2org[_src] = _org
        _file2src[_file] = _src

    return _file2src, _src2org
Exemplo n.º 18
0
def get_archaea_kplets():

    sql_cmd = """select apc.*, s1.cnt, s1.wgt
                 from (
                        select ap.id ,count(*) as cnt, sum(w.weight) as wgt
                        from archea_5plets ap
                        inner join archea_5plets_win10 apw on ap.id = apw.kplet_id
                        inner join archea_win10_files awf on apw.file_id = awf.id
                        inner join sources s on awf.source_id=s.id
                        inner join weights w on w.genome_id=s.genome_id
                        group by ap.id ) s1
                 inner join archea_5plets_codes apc on s1.id=apc.id
                 order by s1.wgt desc"""

    _db = DbClass()
    _db.cmd = sql_cmd

    return _db.retrieve()
Exemplo n.º 19
0
def extract_baited_duplet_aggregates(duplet_type="adj"):

    if duplet_type == "adj":
        sql_cmd = """select d.id, 
                        p1.code as profile_1, 
                        p2.code as profile_2, 
                        count(*) as loci, 
                        sum(w.weight) as loci_weight, 
                        group_concat(CONVERT(df.file_id, CHAR(20)) separator ',') as files  
                    from prok1402_baited_adj_duplet d 
                    inner join prok1402_baited_adj_duplet_file df on d.id = df.kplet_id 
                    inner join cdd_profiles p1 on d.kplet_1 = p1.id 
                    inner join cdd_profiles p2 on d.kplet_2 = p2.id 
                    inner join prok1402_baited_files bf on bf.id = df.file_id 
                    inner join sources s on bf.source_id = s.id 
                    inner join genomes g on g.id = s.genome_id 
                    inner join weights w on w.genome_id = g.id 
                    group by d.id 
                    order by loci desc;"""
    else:
        sql_cmd = """select d.id, 
                        p1.code as profile_1, 
                        p2.code as profile_2, 
                        count(*) as loci, 
                        sum(w.weight) as loci_weight, 
                        group_concat(CONVERT(df.file_id, CHAR(20)) separator ',') as files 
                    from prok1402_baited_duplet d 
                    inner join prok1402_baited_duplet_file df on d.id = df.kplet_id
                    inner join cdd_profiles p1 on d.kplet_1 = p1.id
                    inner join cdd_profiles p2 on d.kplet_2 = p2.id
                    inner join prok1402_baited_files bf on bf.id = df.file_id
                    inner join sources s on bf.source_id = s.id
                    inner join genomes g on g.id = s.genome_id
                    inner join weights w on w.genome_id = g.id
                    group by d.id
                    order by loci desc;"""

    _db = DbClass()
    _db.cmd = sql_cmd

    rows = _db.retrieve()

    return rows
Exemplo n.º 20
0
def get_report_kplets(id2cdd, limit_to=500):

    _db = DbClass()
    _db.cmd = """SET group_concat_max_len=15000000"""
    _db.execute()

    _db.cmd = """select cp.*, count(distinct cwf.id) as cnt, group_concat(cwf.name) as files
                from crispr_5plets cp
                inner join crispr_5plets_wgs cpw on cp.id = cpw.kplet_id
                inner join crispr_wgs_files cwf on cpw.file_id = cwf.id
                group by cp.id
                having count(distinct cwf.id)>1
                order by cnt desc
                limit 0, %d""" % limit_to

    out_list = []

    for row in _db.retrieve():
        id = row[0]
        kplet_codes = ([id2cdd[int(_id)] for _id in row[1:6]])
        if len(set(kplet_codes)) != 5:
            continue
        count = row[6]
        files = row[7].split(',')
        tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files)
        out_list.append(tmp_kplet)

    return out_list
Exemplo n.º 21
0
def get_report_kplets(id2cdd, limit_to=500, load_locations=None):

    _db = DbClass()
    _db.cmd = """SET group_concat_max_len=1500000"""
    _db.execute()

    _db.cmd = """select ap.* ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an
                 from bacteria_3plets ap
                 inner join bacteria_3plets_win10 apw on ap.id = apw.kplet_id
                 inner join bacteria_win10_files awf on apw.file_id = awf.id
                 inner join sources s on awf.source_id=s.id
                 inner join weights w on w.genome_id=s.genome_id
                 group by ap.id
                 having count(distinct s.genome_id)>1
                 order by wgt desc
                 limit 0, %d""" % limit_to

    out_list = []

    for row in _db.retrieve():
        id = row[0]
        kplet_codes = ([id2cdd[int(_id)] for _id in row[1:4]])
        if len(set(kplet_codes)) != 3:
            continue
        count = row[4]
        weight = row[5]
        files = row[6].split(',')
        tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files)
        out_list.append(tmp_kplet)

    _path = neighborhoods_path()
    if load_locations:
        [kplet.load_locations(_path) for kplet in out_list]

    return out_list
Exemplo n.º 22
0
def get_multiple_kplets():

    _db = DbClass()
    _db.cmd = "SET group_concat_max_len = 100000;"
    _db.execute()
    _db.cmd = """ select  ap.id, count(*) cnt, group_concat(convert(apw.file_id, char(15))) as file_ids
                  from bacteria_3plets ap
                  inner join bacteria_3plets_win10 apw on ap.id = apw.kplet_id
                  group by ap.id
                  having count(*)>1
                  order by cnt desc"""

    return _db.retrieve()
Exemplo n.º 23
0
def get_multiple_kplets():

    _db = DbClass()
    _db.cmd = "SET group_concat_max_len = 100000;"
    _db.execute()
    _db.cmd = """ select  ap.id, count(*) cnt, group_concat(convert(apw.file_id, char(15))) as file_ids
                  from bacteria_3plets ap
                  inner join bacteria_3plets_win10 apw on ap.id = apw.kplet_id
                  group by ap.id
                  having count(*)>1
                  order by cnt desc"""

    return _db.retrieve()
Exemplo n.º 24
0
def store_kplets(kplets, fname):

    _sql_kplet = """insert ignore into bacteria_4plets (kplet_1, kplet_2, kplet_3, kplet_4) values \n"""

    _sql_kplet_file = """insert ignore into bacteria_4plets_win10 (kplet_id, file_id) values \n"""

    for kplet in kplets:
        kplet = list(kplet)
        kplet.sort()
        kplet = tuple(kplet)
        _sql_kplet += (
            """((select id from cdd_profiles where code='%s'),""" +
            """(select id from cdd_profiles where code='%s'),""" +
            """(select id from cdd_profiles where code='%s'),""" +
            """(select id from cdd_profiles where code='%s')),\n""") % kplet

        _sql_kplet_file += (
            """((select id from bacteria_4plets where """ +
            """kplet_1=(select id from cdd_profiles where code='%s') """ +
            """and kplet_2=(select id from cdd_profiles where code='%s') """ +
            """and kplet_3=(select id from cdd_profiles where code='%s') """ +
            """and kplet_4=(select id from cdd_profiles where code='%s')),""" +
            """(select id from bacteria_win10_files where name='%s')),\n"""
        ) % (kplet + (fname, ))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()

    _db.cmd = _sql_kplet_file
    _db.execute()
Exemplo n.º 25
0
def get_code_kplet(kplet_id, id2cdd=None):

    _db = DbClass()

    if not id2cdd:
        _db.cmd = """select cp1.code, cp2.code
                from bacteria_2plets bp
                inner join cdd_profiles cp1 on cp1.id = bp.kplet_1
                inner join cdd_profiles cp2 on cp2.id = bp.kplet_2
                where bp.id = %d""" % kplet_id
        retval = _db.retrieve()[0]

    else:

        _db.cmd = """select kplet_1, kplet_2
                     from bacteria_2plets where id = %d""" % kplet_id

        retval = _db.retrieve()[0]
        retval = set([id2cdd[id] for id in retval])

    return retval
Exemplo n.º 26
0
def store_kplets_pile(kplets_pile, profile2id, file2id):

    kplet_table_name = "crispr_2plets"
    kplet_file_table_name = "crispr_2plets_wgs"

    _sql_kplet = """insert ignore into %s (kplet_1, kplet_2) values \n""" % kplet_table_name
    _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_file_table_name

    for (kplets, fname) in kplets_pile:

        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()

            kplet = tuple([int(profile2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d),\n""" % kplet

            _sql_kplet_file += ("""((select id from crispr_2plets where """ +
                                """kplet_1=%d and kplet_2=%d),""" +
                                """%d),\n""") % (kplet +
                                                 (int(file2id[fname]), ))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
Exemplo n.º 27
0
def get_code_kplet(kplet_id):
    _sql_cmd = """select kplet_1, kplet_2, kplet_3, kplet_4, kplet_5 from archea_5plets_codes where id = %s""" % kplet_id
    _db = DbClass()
    _db.cmd = _sql_cmd
    return _db.retrieve()
Exemplo n.º 28
0
def store_kplets_pile(kplets_pile, cdd2id, file2id):

    _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n"""

    _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n"""

    for (kplets, fname) in kplets_pile:

        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()
            kplet = tuple([int(cdd2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d),\n""" % kplet

            _sql_kplet_file += ("""((select id from bacteria_3plets where """ +
                                """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" +
                                """%d),\n""") % (kplet + (int(file2id[fname]),))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
Exemplo n.º 29
0
def store_kplets_pile(kplets_pile, profile2id, file2id):

    kplet_table_name = "crispr_5plets"
    kplet_table_file_name = "crispr_5plets_wgs"

    _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3, kplet_4, kplet_5) values \n""" % kplet_table_name
    _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name

    for (kplets, fname) in kplets_pile:
        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()

            kplet = tuple([int(profile2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d, %d, %d),\n""" % kplet

            _sql_kplet_file += ("""((select id from crispr_5plets where """ +
                                """kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d and kplet_5=%d),""" +
                                """%d),\n""") % (kplet + (int(file2id[fname]),))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
Exemplo n.º 30
0
def map_file_name2id():

    _db = DbClass()
    _db.cmd = """ select name, id from  bacteria_win10_files"""
    rows = _db.retrieve()
    return {row[0]: row[1] for row in rows}
Exemplo n.º 31
0
__author__ = 'hudaiber'

import sys
sys.path.append('../..')
from lib.db import DbClass

_db = DbClass()

# for table_name in ['crispr_4plets_wgs', 'crispr_3plets_wgs', 'crispr_2plets_wgs']:
for table_name in ['crispr_5plets_wgs']:
    print "Starting for table:", table_name
    print
    print "Loading the duplicates"
    _db.cmd = """SELECT count(*) as cnt, kplet_id, file_id
                FROM PatternQuest.%s
                group by kplet_id, file_id
                having count(*)>1
                order by cnt desc""" % table_name

    table_rows = [row for row in _db.retrieve()]

    print("Number of duplicate entries:", len(table_rows))
    fname = "duplicates_%s.txt" % table_name
    with open(fname, 'w') as f:
        [f.write("%d\t%d\t%d\n" % (row)) for row in table_rows]
    print "Duplicates saved in file:", fname
    continue
    sys.exit()
    print("Starting to remove duplicates")

    total_cnt = 0
Exemplo n.º 32
0
def map_file_id2name():

    _db = DbClass()
    _db.cmd = """select id, name from bacteria_win10_files"""
    return {str(l[0]): l[1] for l in _db.retrieve()}
Exemplo n.º 33
0
def store_kplets_pile(kplets_pile, cdd2id, file2id):

    _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n"""

    _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n"""

    for (kplets, fname) in kplets_pile:

        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()
            kplet = tuple([int(cdd2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d),\n""" % kplet

            _sql_kplet_file += (
                """((select id from bacteria_3plets where """ +
                """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" +
                """%d),\n""") % (kplet + (int(file2id[fname]), ))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
Exemplo n.º 34
0
def map_source2id():

    _db = DbClass()
    _db.cmd = """ SELECT id, name FROM PatternQuest.sources; """
    rows = _db.retrieve()
    return {row[1]: row[0] for row in rows}
__author__ = 'hudaiber'

import sys
sys.path.append('../..')
from lib.db import DbClass

_db = DbClass()

# for table_name in ['crispr_4plets_wgs', 'crispr_3plets_wgs', 'crispr_2plets_wgs']:
for table_name in ['crispr_5plets_wgs']:
    print "Starting for table:", table_name
    print
    print "Loading the duplicates"
    _db.cmd = """SELECT count(*) as cnt, kplet_id, file_id
                FROM PatternQuest.%s
                group by kplet_id, file_id
                having count(*)>1
                order by cnt desc""" % table_name

    table_rows = [row for row in _db.retrieve()]

    print("Number of duplicate entries:", len(table_rows))
    fname = "duplicates_%s.txt" % table_name
    with open(fname, 'w') as f:
        [f.write("%d\t%d\t%d\n"%(row)) for row in table_rows]
    print "Duplicates saved in file:", fname
    continue
    sys.exit()
    print("Starting to remove duplicates")

    total_cnt = 0
Exemplo n.º 36
0
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id):

    kplet_table_name = "%s_5plets" % prefix
    kplet_table_file_name = "%s_5plets_files" % prefix

    _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3, kplet_4, kplet_5) values \n""" % kplet_table_name
    _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name

    for (kplets, fname) in kplets_pile:
        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()

            kplet = tuple([int(profile2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d, %d, %d),\n""" % kplet

            _sql_kplet_file += (
                """((select id from %s where """ +
                """kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d and kplet_5=%d),"""
                + """%d),\n""") % ((kplet_table_name, ) + kplet +
                                   (int(file2id[fname]), ))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
Exemplo n.º 37
0
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id):

    kplet_table_name = "%s_2plets" % prefix
    kplet_file_table_name = "%s_2plets_files" % prefix

    _sql_kplet = """insert ignore into %s (kplet_1, kplet_2) values \n""" % kplet_table_name
    _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_file_table_name

    for (kplets, fname) in kplets_pile:
        
        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()

            kplet = tuple([int(profile2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d),\n""" % kplet

            _sql_kplet_file += ("""((select id from %s where """ +
                                """kplet_1=%d and kplet_2=%d),""" +
                                """%d),\n""") % ((kplet_table_name,) + kplet + (int(file2id[fname]),))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()