示例#1
0
 def getAgent(self):
     log_.info("获取代理列表")
     self.cs.execute("SELECT sname,name,code from agent")
     agent_list = []
     for row in self.cs:
         agent_list.append(AGENT(toStr(row[0]), toStr(row[1]), row[2]))
     return agent_list
示例#2
0
def guess_sex(ref, data, sex_ratio_threshold=0.75):
    """
        guessing the sex of individuals by comparing heterogametic chromosomes.
        By convention, all chromosomes are assumed to be diploid unless they start
        with an `X` or `Z`
    """
    ref["heterogametic"] = [v[0] in "XZxz" for v in ref.index.get_level_values('chrom')]
    data["heterogametic"] = [v[0] in "XZxz" for v in data.index.get_level_values('chrom')]

    n_sites = ref.groupby(ref.heterogametic).apply(lambda df: len(df))
    n_reads = data.groupby(data.heterogametic).apply(lambda df: np.sum(df.tref + df.talt))
    cov = n_reads / n_sites

    del data["heterogametic"]
    del ref['heterogametic']

    #no heteogametic data
    if True not in cov:
        return 'f'


    if cov[True] / cov[False] < sex_ratio_threshold:
        sex = "m"
        log_.info("guessing sex is male, X/A = %.4f/%.4f" % (cov[True], cov[False]))
    else:
        sex = "f"
        log_.info("guessing sex is female, X/A = %.4f/%.4f" % (cov[True], cov[False]))
    return sex
示例#3
0
def guess_sex(data, sex_ratio_threshold=0.8):
    """
        guessing the sex of individuals by comparing heterogametic chromosomes.
        By convention, all chromosomes are assumed to be diploid unless they start
        with an `X` or `W`
    """
    data["heterogametic"] = [
        v[0] in "XZxz" for v in data.index.get_level_values('chrom')
    ]
    cov = data.groupby(
        data.heterogametic).apply(lambda df: np.sum(df.tref + df.talt))
    cov = cov.astype(float)

    #no heteogametic data
    if True not in cov:
        return 'f'

    cov[True] /= np.sum(data.heterogametic)
    cov[False] /= np.sum(data.heterogametic == False)

    del data["heterogametic"]

    if cov[True] / cov[False] < sex_ratio_threshold:
        sex = "m"
        log_.info("guessing sex is male, X/A = %.4f/%.4f" %
                  (cov[True], cov[False]))
    else:
        sex = "f"
        log_.info("guessing sex is female, X/A = %.4f/%.4f" %
                  (cov[True], cov[False]))
    return sex
示例#4
0
 def getUser(self):
     log_.info("获取用户列表")
     self.cs.execute("SELECT id,name,open_id,level,edit,address from user")
     userlist = []
     for row in self.cs:
         userlist.append(
             USER(row[0], toStr(row[1]), toStr(row[2]), row[3], row[4],
                  row[5]))
     return userlist
示例#5
0
def post_url():
    access_token, expires_in = get_token_info()
    print "token expires_in:%s" % expires_in
    timer = threading.Timer((expires_in - 200), post_url)
    timer.start()
    get_url_token[0] = "%s" % access_token.encode('utf-8')
    print access_token
    post_url_freshing[
        0] = 'https://api.weixin.qq.com/cgi-bin/message/custom/send?access_token=%s' % access_token
    log_.info("刷新token成功,%s" % post_url_freshing[0])
    log_.info("刷新token:%s" % get_url_token[0])
示例#6
0
 def getTargetUser(self, open_id):
     try:
         log_.info("获取目标用户,open_id = " + open_id)
         sql = "SELECT id,name,level,edit,address from user WHERE open_id = \'" + open_id + '\''
         self.cs.execute(sql)
         res = self.cs.fetchone()
         user = USER(res[0], toStr(res[1]), toStr(open_id), res[2], res[3],
                     res[4])
         return user
     except Exception as res:
         print res
         log_.warning("获取用户失败")
         return USER(9999, '未知用户', open_id, 0, 0, 0)
示例#7
0
 def updateAnswer(self, content):
     try:
         log_.info("更新回答")
         key = content.split('号')[0] + '号'
         if not msg.has_key(key):
             return 'nomember'
         sql = "update msg set answer = \'" + content + "\' where key = \'" + key + "\'"
         self.cs.execute(sql)
         self.conn.commit()
         msg[key].answer['answer'] = content
         return 'success'
     except Exception as res:
         print res
         log_.warning("更新回答失败:" + res)
         return res
示例#8
0
def get_userInfo(open_id):
    url = "https://api.weixin.qq.com/cgi-bin/user/info?access_token=" + get_url_token[
        0] + "&openid=" + open_id + "&lang=zh_CN"
    log_.info("尝试获取用户信息:" + open_id)
    log_.info("url: " + url)
    res = requests.get(url)
    js = res.json()
    if "errmsg" not in js:
        return js[u"nickname"].encode('utf-8')
    else:
        print "Can not get user information"
        print js
        log_.error("获取用户信息失败,%s" % js)
        print "use token: " + get_url_token[0]
        return "未知用户"
示例#9
0
 def update_one_user(self, user_info):
     try:
         log_.info(
             "更新一个用户: name: %s open_id: %s level %d edit %d address %d" %
             (user_info.name, user_info.open_id, user_info.level,
              user_info.can_edit, user_info.edit_address))
         sql = "replace into user(name,open_id,level,edit,address) values(\'" + user_info.name + "\',\'" + user_info.open_id + "\'," + str(
             user_info.level) + "," + str(user_info.can_edit) + "," + str(
                 user_info.edit_address) + ")"
         self.cs.execute(sql)
         self.conn.commit()
         return True
     except Exception as res:
         print res
         log_.warning("更新用户失败:" + res)
         return False
示例#10
0
 def refresh(self):
     log_.info("刷新列表")
     ul = self.getUser()
     user.clear()
     manager.clear()
     for u in ul:
         user[u.open_id] = u
         if u.level == 9:
             manager[u.name] = u.open_id
     self.getTableName()
     msg.clear()
     ml = self.getMsg()
     for m in ml:
         msg[m.key] = m
     ag = self.getAgent()
     for a in ag:
         agents[a.code] = a
示例#11
0
 def getTableName(self):
     try:
         log_.info("获取目录结构")
         sql = "pragma table_info (\'msg\')"
         self.cs.execute(sql)
         res = self.cs.fetchall()
         for i in range(len(title)):
             del title[0]
         for rt in res:
             r = toStr(rt[1])
             if 'l' in r or r in default_list or '预留' in r:
                 continue
             else:
                 title.append(r)
     except Exception as e:
         print e
         log_.critical("目录获取失败:" + e)
示例#12
0
 def getMsg(self):
     log_.info("获取消息列表")
     sql = "SELECT "
     for d in default_list:
         sql += d + ","
     for t in title:
         sql += t + ","
     sql += "l0,l1,l2,l3,l4,l5,l6,l7,l8,l9 from msg"
     self.cs.execute(sql)
     msglist = []
     for row in self.cs:
         key = toStr(row[0])
         answer_dict = dict()
         price = Xstr(row[-10:])
         for a in range(1, len(default_list)):
             answer_dict[default_list[a]] = toStr(row[a])
         for i in range(len(title)):
             answer_dict[title[i]] = toStr(row[i + len(default_list)])
         msglist.append(MSG(key, answer_dict, price))
     return msglist
示例#13
0
 def getAnswer(self, content):
     try:
         log_.info("获取回答")
         ask = content.split('+')
         sql = "SELECT answer"
         for t in title:
             sql += t
         sql += ",l0,l1,l2,l3,l4,l5,l6,l7,l8,l9 from msg WHERE key = \'" + ask[
             0] + '\''
         self.cs.execute(sql)
         res = self.cs.fetchone()
         answer_dict = {'answer': toStr(res[0])}
         price = Xstr(res[-9:])
         for i, t in title:
             answer_dict[t] = toStr(res[i + 1])
         return MSG(ask[0], answer_dict, price)
     except Exception as res:
         print res
         log_.warning("回答获取失败:" + res)
         return res
示例#14
0
def filter_ref(ref, states, filter_delta=None, filter_pos=None, filter_map=None):
    n_states = len(states)

    if filter_delta is not None:
        kp = np.zeros(ref.shape[0], np.bool)
        for i, s1 in enumerate(states):
            for j in range(i + 1, n_states):
                s2 = states[j]
                f1 = np.nan_to_num(
                    ref[s1 + "_alt"] / (ref[s1 + "_alt"] + ref[s1 + "_ref"])
                )
                f2 = np.nan_to_num(
                    ref[s2 + "_alt"] / (ref[s2 + "_alt"] + ref[s2 + "_ref"])
                )
                delta = np.abs(f1 - f2)
                kp = np.logical_or(kp, delta >= filter_delta)

        log_.info("filtering %s SNP due to delta", np.sum(1 - kp))
        ref = ref[kp]


    if filter_pos is not None:
        chrom = ref.index.get_level_values('chrom').factorize()[0]
        pos = ref.index.get_level_values('pos').values
        kp = nfp(chrom, pos, ref.shape[0], filter_pos)
        log_.info("filtering %s SNP due to pos filter", np.sum(1 - kp))
        ref = ref[kp]

    if filter_map is not None:
        chrom = ref.index.get_level_values('chrom').factorize()[0]
        pos = ref.index.get_level_values('map').values
        kp = nfp(chrom, pos, ref.shape[0], filter_map)
        log_.info("filtering %s SNP due to map filter", np.sum(1 - kp))
        ref = ref[kp]

    return ref
示例#15
0
 def __init__(self):
     log_.info("连接数据库")
     self.connect()
示例#16
0
def data2probs(
        df,
        IX,
        state_ids,
        cont_id=None,
        prior=None,
        cont_prior=(1e-8, 1e-8),
        ancestral=None,
):
    """create data structure that holds the genetic data

    creates an object of type `Probs` with the following entries:
    O : array[n_obs]: the number of alternative reads
    N : array[n_obs]: the total number of reads
    P_cont : array[n_obs]: the contaminant allele frequency
    lib[n_obs] : the library /read group of the observation
    alpha[n_snps, n_states] : the reference allele beta-prior
    beta[n_snps, n_states] : the alt allele beta-prior
    """

    alpha_ix = ["%s_alt" % s for s in state_ids]
    beta_ix = ["%s_ref" % s for s in state_ids]
    snp_ix_states = set(alpha_ix + beta_ix)

    if cont_id is not None:
        cont = "%s_alt" % cont_id, "%s_ref" % cont_id
        snp_ix_states.update(cont)
    if ancestral is not None:
        anc = "%s_alt" % ancestral, "%s_ref" % ancestral
        snp_ix_states.update(anc)

    snp_df = df[list(snp_ix_states)]
    snp_df = snp_df[~snp_df.index.get_level_values('snp_id').duplicated()]
    #snp_df = df[list(snp_ix_states)].groupby(df.index.names).first()
    n_snps = len(snp_df.index.get_level_values('snp_id'))
    n_states = len(state_ids)

    if prior is None:  # empirical bayes
        alpha = np.empty((n_snps, n_states))
        beta = np.empty((n_snps, n_states))
        if cont_id is not None:
            ca, cb = empirical_bayes_prior(snp_df[cont[0]], snp_df[cont[1]])

        if ancestral is None:
            for i, (a, b, s) in enumerate(zip(alpha_ix, beta_ix, state_ids)):
                pa, pb = empirical_bayes_prior(snp_df[a], snp_df[b])
                log_.info("[%s]EB prior [a=%.4f, b=%.4f]: " % (s, pa, pb))
                alpha[:, i] = snp_df[a] + pa
                beta[:, i] = snp_df[b] + pb
        else:
            anc_ref, anc_alt = ancestral + "_ref", ancestral + "_alt"
            ref_is_anc = (snp_df[anc_ref] > 0) & (snp_df[anc_alt] == 0)
            alt_is_anc = (snp_df[anc_alt] > 0) & (snp_df[anc_ref] == 0)
            ref_is_der, alt_is_der = alt_is_anc, ref_is_anc
            anc_is_unknown = (1 - alt_is_anc) * (1 - ref_is_anc) == 1
            for i, (a, b, s) in enumerate(zip(alpha_ix, beta_ix, state_ids)):
                pa, pb = empirical_bayes_prior(snp_df[a], snp_df[b])
                log_.info("[%s]EB prior0 [anc=%.4f, der=%.4f]: " % (s, pa, pb))
                alpha[:, i], beta[:, i] = snp_df[a], snp_df[b]
                alpha[anc_is_unknown, i] += pa
                beta[anc_is_unknown, i] += pb

                m_anc = pd.concat((ref_is_anc, alt_is_anc), 1)
                m_der = pd.concat((ref_is_der, alt_is_der), 1)
                ANC = np.array(snp_df[[b, a]])[m_anc]
                DER = np.array(snp_df[[b, a]])[m_der]

                pder, panc = empirical_bayes_prior(DER, ANC, True)
                log_.info("[%s]EB prior1 [anc=%.4f, der=%.4f]: " %
                          (s, panc, pder))
                alpha[alt_is_anc, i] += panc
                alpha[alt_is_der, i] += pder
                beta[ref_is_anc, i] += panc
                beta[ref_is_der, i] += pder

        P = Probs2(
            O=np.array(df.talt.values, np.int8),
            N=np.array(df.tref.values + df.talt.values, np.int8),
            P_cont=np.zeros_like(df.talt.values) if cont_id is None else
            np.array((df[cont[0]].values + ca) /
                     (df[cont[0]].values + df[cont[1]].values + ca + cb)),
            alpha=alpha[IX.diploid_snps],
            beta=beta[IX.diploid_snps],
            alpha_hap=alpha[IX.haploid_snps],
            beta_hap=beta[IX.haploid_snps],
            lib=np.array(df.lib),
        )
        return P

    else:
        if ancestral is None:
            pass
        else:
            # anc_ref, anc_alt = f"{ancestral}_ref", f"{ancestral}_alt"
            anc_ref, anc_alt = ancestral + "_ref", ancestral + "_alt"
            pa = df[anc_alt] + prior * (1 - 2 * np.sign(df[anc_alt]))
            pb = df[anc_ref] + prior * (1 - 2 * np.sign(df[anc_ref]))
        cont = "%s_alt" % cont_id, "%s_ref" % cont_id
        ca, cb = cont_prior

        print(alpha_ix)
        alpha = np.array(snp_df[alpha_ix]) + prior
        beta = np.array(snp_df[beta_ix]) + prior
        P = Probs2(
            O=np.array(df.talt.values, np.int8),
            N=np.array(df.tref.values + df.talt.values, np.int8),
            P_cont=None if cont_id is None else np.array(
                (df[cont[0]] + ca) / (df[cont[0]] + df[cont[1]] + ca + cb)),
            alpha=alpha[IX.diploid_snps],
            beta=beta[IX.diploid_snps],
            alpha_hap=alpha[IX.haploid_snps],
            beta_hap=beta[IX.haploid_snps],
            lib=np.array(df.lib),
        )
        return P
示例#17
0
def init_pars(state_ids,
              sex=None,
              F0=0.001,
              tau0=1,
              e0=1e-2,
              c0=1e-2,
              est_inbreeding=False,
              init_guess=None,
              do_hap=True,
              **kwargs):
    """initialize parameters

    returns a pars object
    """
    h**o = [s for s in state_ids]
    het = []
    hap = ["h%s" % s for s in h**o]

    for i, s in enumerate(state_ids):
        for s2 in state_ids[i + 1:]:
            het.append(s + s2)
    gamma_names = h**o + het
    if est_inbreeding:
        gamma_names.extend(hap)

    n_states = len(gamma_names)
    n_homo = len(h**o)
    n_het = len(het)
    n_hap = len(hap)

    alpha0 = np.array([1 / n_states] * n_states)
    alpha0_hap = np.array([1 / n_hap] * n_hap)

    trans_mat = np.zeros((n_states, n_states)) + 2e-2
    trans_mat_hap = np.zeros((n_hap, n_hap)) + 2e-2

    np.fill_diagonal(trans_mat, 1 - (n_states - 1) * 2e-2)
    np.fill_diagonal(trans_mat_hap, 1 - (n_hap - 1) * 2e-2)
    cont = defaultdict(lambda: c0)
    error = defaultdict(lambda: e0)

    if init_guess is not None:
        # guess = [i for i, n in enumerate(gamma_names) if init_guess in n]
        guess = [i for i, n in enumerate(gamma_names) if n in init_guess]
        log_.info("starting with guess %s " % guess)
        trans_mat[:, guess] = trans_mat[:, guess] + 1
        trans_mat /= np.sum(trans_mat, 1)[:, np.newaxis]

    try:
        if len(F0) == n_homo:
            F = F0
        elif len(F0) == 1:
            F = F0 * n_homo
        else:
            F = [F0]
    except TypeError:
        F = [F0] * n_homo
    try:
        if len(tau0) == n_homo:
            tau = tau0
        elif len(F0) == 1:
            tau = tau0 * n_homo
        else:
            tau = [tau0]
    except TypeError:
        tau = [tau0] * n_homo
    if do_hap:
        return ParsHD(
            alpha0,
            alpha0_hap,
            trans_mat,
            trans_mat_hap,
            cont,
            error,
            F,
            tau,
            gamma_names,
            sex=sex,
        )
    else:
        return Pars(alpha0,
                    trans_mat,
                    cont,
                    error,
                    F,
                    tau,
                    gamma_names,
                    sex=sex)
示例#18
0
def data2probs(
    df,
    IX,
    state_ids,
    cont_id=None,
    prior=None,
    cont_prior=(1e-8, 1e-8),
    ancestral=None,
    ancestral_prior = 0
):
    """create data structure that holds the reference genetic data

    creates an object of type `Probs` with the following entries:
    O : array[n_obs]: the number of alternative reads
    N : array[n_obs]: the total number of reads
    P_cont : array[n_obs]: the contaminant allele frequency
    lib[n_obs] : the library /read group of the observation
    alpha[n_snps, n_states] : the reference allele beta-prior
    beta[n_snps, n_states] : the alt allele beta-prior


    input:

    df: merged reference and SNP data. has columns tref, talt with the read
    counts at each SNP, and "X_alt, X_ref" for each source pop
    IX: index object, with number of snps, number of reads, etc.
    state_ids: the references to keep
    prior: None for empirical bayes prior, otherwise prior to be added
    ancestral: ancestray allele
    """

    alt_ix = ["%s_alt" % s for s in state_ids]
    ref_ix = ["%s_ref" % s for s in state_ids]
    snp_ix_states = set(alt_ix + ref_ix)

    if cont_id is not None:
        cont = "%s_alt" % cont_id, "%s_ref" % cont_id
        snp_ix_states.update(cont)
    if ancestral is not None:
        anc = "%s_alt" % ancestral, "%s_ref" % ancestral
        snp_ix_states.update(anc)

    snp_df = df[list(snp_ix_states)]
    snp_df = snp_df[~snp_df.index.get_level_values('snp_id').duplicated()]
    #snp_df = df[list(snp_ix_states)].groupby(df.index.names).first()
    n_snps = len(snp_df.index.get_level_values('snp_id'))
    n_states = len(state_ids)


    if prior is None:  # empirical bayes, estimate from data
        alt_prior = np.empty((n_snps, n_states))
        ref_prior = np.empty((n_snps, n_states))
        if cont_id is not None:
            ca, cb = empirical_bayes_prior(snp_df[cont[0]], snp_df[cont[1]])

        if ancestral is None:
            for i, (a, b, s) in enumerate(zip(alt_ix, ref_ix, state_ids)):
                pa, pb = empirical_bayes_prior(snp_df[a], snp_df[b])
                log_.info("[%s]EB prior [a=%.4f, b=%.4f]: " % (s, pa, pb))
                alt_prior[:, i] = snp_df[a] + pa
                ref_prior[:, i] = snp_df[b] + pb
        else:
            anc_ref, anc_alt = f"{ancestral}_ref", f"{ancestral}_alt"

            #set up vectors stating which allele is ancestral
            ref_is_anc = (snp_df[anc_ref] > 0) & (snp_df[anc_alt] == 0)
            alt_is_anc = (snp_df[anc_alt] > 0) & (snp_df[anc_ref] == 0)
            ref_is_der, alt_is_der = alt_is_anc, ref_is_anc
            anc_is_unknown = (1 - alt_is_anc) * (1 - ref_is_anc) == 1

            for i, (alt_col, ref_col, s) in enumerate(zip(alt_ix, ref_ix, state_ids)):

                #1. set up base entries based on observed counts
                alt_prior[:, i] = snp_df[alt_col]
                ref_prior[:, i] = snp_df[ref_col]

                #2. where anc is unknown, add symmetric prior estimated from data
                pa, pb = empirical_bayes_prior(snp_df[alt_col], snp_df[ref_col])
                log_.info("[%s]EB prior0 [anc=%.4f, der=%.4f]: " % (s, pa, pb))
                alt_prior[anc_is_unknown, i] += pa
                ref_prior[anc_is_unknown, i] += pb

                #3. where anc is known, create indices
                m_anc = pd.concat((ref_is_anc, alt_is_anc), 1)
                m_der = pd.concat((ref_is_der, alt_is_der), 1)
                ANC = np.array(snp_df[[ref_col, alt_col]])[m_anc]
                DER = np.array(snp_df[[ref_col, alt_col]])[m_der]

                pder, panc = empirical_bayes_prior(DER, ANC, known_anc=True)
                panc += ancestral_prior
                log_.info("[%s]EB prior1 [anc=%.4f, der=%.4f]: " % (s, panc, pder))
                alt_prior[alt_is_anc, i] += panc 
                alt_prior[alt_is_der, i] += pder
                ref_prior[ref_is_anc, i] += panc
                ref_prior[ref_is_der, i] += pder

        assert np.all(df.tref.values + df.talt.values < 256)

        P = Probs2(
            O=np.array(df.talt.values, np.uint8),
            N=np.array(df.tref.values + df.talt.values, np.uint8),
            P_cont=np.zeros_like(df.talt.values)
            if cont_id is None
            else np.array(
                (df[cont[0]].values + ca) / (df[cont[0]].values + df[cont[1]].values + ca + cb)
            ),
            alpha=alt_prior[IX.diploid_snps],
            beta=ref_prior[IX.diploid_snps],
            alpha_hap=alt_prior[IX.haploid_snps],
            beta_hap=ref_prior[IX.haploid_snps],
            lib=np.array(df.lib),
        )
        return P
    else:

        """ancestral allele contribution to prior
        the ancestral allele adds one pseudocount to the data
        """
        if ancestral is None:
            prior_anc_alt, prior_anc_ref = np.zeros(1), np.zeros(1)
        else:
            anc_ref, anc_alt = f"{ancestral}_ref", f"{ancestral}_alt"
            prior_anc_alt = snp_df[anc_alt] * ancestral_prior
            prior_anc_ref = snp_df[anc_ref] * ancestral_prior

        cont = "%s_alt" % cont_id, "%s_ref" % cont_id
        ca, cb = cont_prior

        alt_prior = snp_df[alt_ix].to_numpy() + prior_anc_alt[:, np.newaxis] + prior
        ref_prior = snp_df[ref_ix].to_numpy() + prior_anc_ref[:, np.newaxis] + prior
        #breakpoint()
        assert df.tref.values + df.talt.values < 256
        P = Probs2(
            O=np.array(df.talt.values, np.uint8),
            N=np.array(df.tref.values + df.talt.values, np.uint8),
            P_cont=0.
            if cont_id is None
            else np.array(
                (df[cont[0]] + ca) / (df[cont[0]] + df[cont[1]] + ca + cb)
            ),
            alpha=alt_prior[IX.diploid_snps],
            beta=ref_prior[IX.diploid_snps],
            alpha_hap=alt_prior[IX.haploid_snps],
            beta_hap=ref_prior[IX.haploid_snps],
            lib=np.array(df.lib),
        )
        return P
示例#19
0
 def close(self):
     log_.info("数据库成功关闭")
     self.conn.close()