def get_phi_from_database(tjid): """ 从数据库获取指定样本的个人信息 Parameters ---------- tjid : str specimen barcode Returns -------- phi : PHI PHI object """ conn = connect_database() with conn.cursor() as cur: sql = 'select barcode, name, gender, dob, phone_no, test_product ' \ 'from xy_specimen where barcode=\'%s\';' % tjid cur.execute(sql) res = cur.fetchone() conn.close() if res is None: logger.error('找不到样本%s的个人信息!' % tjid) exit(1) phi = PHI() phi.barcode = res[0] phi.name = ensure_unicode(res[1]) phi.gender = ensure_unicode(res[2]) phi.dob = res[3] phi.phone_no = res[4] phi.test_product = res[5] return phi
def read_physical_examination(fp): """ 读取体检数据. Parameters ----------- fp : str file path Returns --------- res : list list of physical examination records. """ res = [] for cnt in split_tsv_line(fp): record = PhysicalExaminationRecord() record.class0 = ensure_unicode(cnt[0].strip()) record.ksbm = ensure_unicode(cnt[1].strip()) record.tjid = cnt[2].strip() record.sfxmmc = ensure_unicode(cnt[3].strip()) record.item_code = cnt[4].strip() record.item_name = ensure_unicode(cnt[5].strip()) record.check_result = cnt[6].strip() record.unit = cnt[7].strip() record.def_value = ensure_unicode(cnt[8].strip()) res.append(record) return res
def get_questionnaire_result_from_database(tjid): """ 从数据库获取指定体检者的问卷调查信息 Parameters ----------- tjid : str specimen barcode Returns -------- records : list list of QuestionnaireRecord object """ records = [] conn = connect_database() with conn.cursor() as cur: sql = 'select lbcode, lbname, qcode, question, answer ' \ 'from xy_wenjuan where barcode=\'%s\'' % tjid cur.execute(sql) all_res = cur.fetchall() if not all_res: logger.warn('找不到样本%s的问卷调查信息!' % tjid) return [] for res in all_res: ques_record = QuestionnaireRecord() ques_record.tjid = tjid ques_record.lbbm = res[0] ques_record.lbmc = ensure_unicode(res[1]) ques_record.qcode = res[2] ques_record.question = ensure_unicode(res[3]) ques_record.answer = ensure_unicode(res[4]) records.append(ques_record) return records
def read_gene_test_result(fp): """ 读取基因检测的结果, 结果来自于Genopipe. Parameters ------------ fp : str file path Returns --------- res : list list of gene test records """ res = [] for cnt in split_tsv_line(fp): record = GeneTestRecord() record.accession = cnt[0].strip() record.barcode = cnt[1].strip() record.product = cnt[2].strip().lower() record.category = ensure_unicode(cnt[3].strip()) record.test_item = ensure_unicode(cnt[4].strip()) record.risk_level = ensure_unicode(cnt[5].strip()) record.algorithm = cnt[6].strip() record.risk_value = cnt[7].strip() record.snp_name = cnt[8].strip() record.genotype = cnt[9].strip() record.gene = cnt[10].strip() record.ref_alt = cnt[11].strip() res.append(record) return res
def kernel(A, B): ''' Custom kernel function. This counts how often the links incident on two different words within their respective dependency trees are the same, up to the dependency relation and the POS of the neighbour. Note that A references a set of words' dependency trees, and B references another set. So that this function end up making len(A) * len(B) of such comparisons, and return the result as a len(A) by len(B) matrix. ''' result = [] for a in A: token_a = u.ensure_unicode(features.get_token(int(a[0]))) # Get token_a's dependency tree features if syntax_feature_types is not None: syntax_features_a = features.get_features_idx( int(a[0]), syntax_feature_types) # Get the token_a's synset if semantic similarity is being used if semantic_similarity is not None: semantic_features_a = nouns_only(wordnet.synsets(token_a)) if include_suffix: suffix_a = features.get_suffix(token_a) result_row = [] result.append(result_row) for b in B: kernel_score = 0 token_b = u.ensure_unicode(features.get_token(int(b[0]))) # Calculate the dependency tree kernel if syntax_feature_types is not None: syntax_features_b = features.get_features_idx( int(b[0]), syntax_feature_types) kernel_score += syntactic_multiplier * dict_dot( syntax_features_a, syntax_features_b) # Calculate semantic similarity is being used if semantic_similarity is not None: semantic_features_b = nouns_only(wordnet.synsets(token_b)) kernel_score += semantic_multiplier * max_similarity( semantic_similarity, semantic_features_a, semantic_features_b, information_content) # Determine if suffixes match if include_suffix: suffix_b = features.get_suffix(token_b) if suffix_a is not None and suffix_a == suffix_b: kernel_score += suffix_multiplier result_row.append(kernel_score) return result
def eval_pair(self, a, b): ''' Custom kernel function that expects token ids. ''' # Convert ids to tokens a = u.ensure_unicode(self.features.get_token(int(a[0]))) b = u.ensure_unicode(self.features.get_token(int(b[0]))) return self.eval_pair_token(a, b)
def to_mpris2(self): """ Converts the metadata to mpris2 dict >>> mt = Metadata(title='Title', artist='Artist1, Artist2,Artist3', ... album='Album', arturl='file:///art/url', ... location='file:///path/to/file', length=123, ... tracknum=456, ... extra={ 'title': 'Fake Title', ... 'xesam:album': 'Fake Album', ... 'xesam:useCount': 780, ... 'xesam:userRating': 1.0, ... 'custom value': 'yoooooo', ... }) >>> dict = mt.to_mpris2() >>> print dict['xesam:title'] Title >>> print dict['xesam:artist'] [dbus.String(u'Artist1'), dbus.String(u'Artist2'), dbus.String(u'Artist3')] >>> print dict['xesam:url'] file:///path/to/file >>> print dict['mpris:artUrl'] file:///art/url >>> print dict['mpris:length'] 123 >>> print dict['xesam:trackNumber'] 456 >>> print dict['xesam:userRating'] 1.0 >>> 'custom value' in dict False >>> mt2 = Metadata.from_dict(dict) >>> print mt2.title Title >>> print mt2.artist Artist1, Artist2, Artist3 >>> print mt2.album Album >>> print mt2.location file:///path/to/file """ ret = dbus.Dictionary(signature='sv') mpris2map = { 'title': 'xesam:title', 'album': 'xesam:album', 'arturl': 'mpris:artUrl', 'location': 'xesam:url', } for k in ['title', 'album', 'arturl', 'location']: if getattr(self, k) is not None: ret[mpris2map[k]] = dbus.String(utils.ensure_unicode(getattr(self, k))) if self.artist is not None: ret['xesam:artist'] = [dbus.String(v.strip()) for v in self.artist.split(',')] if self.length >= 0: ret['mpris:length'] = dbus.Int64(self.length) if self.tracknum >= 0: ret['xesam:trackNumber'] = dbus.Int32(self.tracknum) for k, v in self._extra.items(): if k in Metadata.MPRIS2_KEYS and k not in ret: ret[k] = v return ret
def handleMessages(self, msg, status): """ Handle incoming messages :param msg: Skype4Py ChatMessage object :param msg: Status of when skype received the message """ if status != Skype4Py.cmsReceived: return chat = msg.Chat body = utils.ensure_unicode(msg.Body).encode("utf-8") commandName, commandArgs = self.parseCommands(body) script = self.scriptHandler.get_script_by_command(commandName) if commandName in self.builtins: self.builtins[commandName](commandArgs, msg, status, self.scriptHandler) logger.info('Running built in command %s with arguments %s' % (commandName, commandArgs)) elif script: def scriptCallback(result): chat.SendMessage(result) logger.info('Running command: %s with arguments: %s' % (commandName, commandArgs)) script.run(commandArgs, scriptCallback) else: chat.SendMessage('Don\'t know what %s does!' % commandName)
def handleMessages(self, msg, status): """ Handle incoming messages :param msg: Skype4Py ChatMessage object :param msg: Status of when skype received the message """ if status != Skype4Py.cmsReceived: return; chat = msg.Chat body = utils.ensure_unicode(msg.Body).encode("utf-8") commandName, commandArgs = self.parseCommands(body) script = self.scriptHandler.get_script_by_command(commandName) if commandName in self.builtins: self.builtins[commandName](commandArgs, msg, status, self.scriptHandler) logger.info('Running built in command %s with arguments %s' %(commandName, commandArgs)) elif script: def scriptCallback(result): chat.SendMessage(result) logger.info('Running command: %s with arguments: %s' %(commandName, commandArgs)) script.run(commandArgs, scriptCallback) else: chat.SendMessage('Don\'t know what %s does!' % commandName)
def get_physical_examination_from_database(tjid, product): """ 从数据库获取指定体检号的体检信息记录 Parameters ----------- tjid : str specimen barcode product : str product name, like healthwise or cardiowise. Returns -------- records : list list of PhysicalExaminationRecord object """ records = [] conn = connect_database() with conn.cursor() as cur: sql = 'select itemcode, itemname, result, unit, defvalue, ' \ 'class0, ksbm, orderitem ' \ 'from xy_tijian Where barcode=\'%s\'' % tjid cur.execute(sql) all_res = cur.fetchall() if not all_res: logger.warn('找不到样本%s的体检信息!' % tjid) return [] for res in all_res: pe_record = PhysicalExaminationRecord() pe_record.tjid = tjid pe_record.item_code = res[0] pe_record.item_name = res[1] pe_record.check_result = res[2] pe_record.unit = res[3] if res[3] is not None else '' pe_record.def_value = ensure_unicode(convert_text2tex(res[4])) \ if res[4] is not None else '' pe_record.class0 = ensure_unicode(res[5]) if res[4] is not None else '' pe_record.ksbm = res[6] pe_record.sfxmmc = ensure_unicode(res[7]) if res[7] is not None else '' # todo, Hard code, should be modified in future. if product == 'healthwise' and pe_record.class0 != u'基本指标': continue if product == 'cardiowise' and pe_record.class0 \ not in [u'基本指标', u'心血管病风险筛查']: continue records.append(pe_record) return records
def get_gene_test_result_from_database(tjid, test_product, phenotypes=None): """ 从数据库获取指定体检号和检测产品的基因检测结果 Parameters ------------ tjid : str specimen barcode test_product : str product name, like healthwise. phenotypes : list list of phenotype names Returns -------- records : list list of GeneTestRecord object """ records = [] conn = connect_database() with conn.cursor() as cur: sql = 'select barcode, test_item, gene, result, category ' \ 'from gene_results ' \ 'where barcode=\'%s\' and lower(test_product)=\'%s\'' \ '' % (tjid, test_product) cur.execute(sql) all_res = cur.fetchall() conn.close() if not all_res: logger.warn('找不到样本%s的基因检测结果!' % tjid) return [] for res in all_res: gene_test_record = GeneTestRecord() gene_test_record.barcode = res[0] gene_test_record.test_item = ensure_unicode(res[1]) gene_test_record.gene = res[2] gene_test_record.risk_level = ensure_unicode(res[3]) gene_test_record.category = ensure_unicode(res[4]) if phenotypes is None: records.append(gene_test_record) else: if gene_test_record.test_item in phenotypes: records.append(gene_test_record) return records
def to_dict(self): """ Converts the PlayerInfo object to an dict that fits the specification """ keys = ['name', 'appname', 'binname', 'cmd', 'icon'] ret = {} for k in keys: ret[k] = utils.ensure_unicode(getattr(self, '_' + k)) return ret
def precompute_parallel(self, examples, num_processes=12): """ Use multiprocessing to precompute the kernel evaluation of all pairs in examples. """ work_queue = iq.IterableQueue() result_queue = iq.IterableQueue() # Add all the example pairs to the work queue print 'loading work onto queue' work_producer = work_queue.get_producer() num_combinations = len(examples) * (len(examples) - 1) / 2 combinations = itertools.combinations_with_replacement(examples, 2) for i, (ex1, ex2) in enumerate(combinations): t4k.progress(i, num_combinations) work_producer.put((ex1, ex2)) work_producer.close() # Start a bunch of workers for proc in range(num_processes): print 'starting worker %d' % proc p = multiprocessing.Process(target=self.precompute_worker, args=(work_queue.get_consumer(), result_queue.get_producer())) p.start() # Get a result consumer, which is the last endpoint. No more endpoints # will be made from either queue, so close them print 'starting to collect results' result_consumer = result_queue.get_consumer() result_queue.close() work_queue.close() # Get all the results and cache them for i, (ex1, ex2, dot) in enumerate(result_consumer): ex1_token = u.ensure_unicode(self.features.get_token(int(ex1[0]))) ex2_token = u.ensure_unicode(self.features.get_token(int(ex2[0]))) t4k.progress(i, num_combinations) self.cache[frozenset((ex1_token, ex2_token))] = dot
def clean_text(text: str): if not isinstance(text, str): raise TypeError(text) text = utils.ensure_unicode(text) # make sure it's proper unicode text = re.sub(r'\b\d+\b', '', text, flags=re.UNICODE) # remove numbers text = re.sub(r'\s+', ' ', text, flags=re.UNICODE).strip() # normalize whitespace if len(text) == 0: raise ValueError('no words provided') return text
def split_tsv_line(fp): """ Split a line in tsv file. Parameters ----------- fp : str file path """ with open(fp) as fi: fi.readline() # skip header line for line in fi: yield ensure_unicode(line).split('\t')
def makeGitPseudoWebscrape(desc): ws = {} for name in desc: ws[name] = { "title": ensure_unicode(desc[name]["Title"][0]), "repository": desc[name].get("repository", "git"), "priority": desc[name]["priority"], "user": desc[name]["user"], "URL": desc[name]["URL"][0], "views": desc[name]["views"], "description": ", ".join(desc[name]["description"]) } return ws
def get_doctor_conclusion_from_database(tjid, product): """ 从数据库获取医生的综合评价 Parameters ----------- tjid : str barcode product : str product name, like healthwise. Returns -------- conclusion : dict 包含遗传背景、环境因素和危险因素、结合以及建议四部分。 """ conclusion = {} conn = connect_database() with conn.cursor() as cur: sql = 'select genetic, environment, conclusion, recommendation, ' \ 'test_code, signature from view_conclusions ' \ 'where barcode=\'%s\' and lower(test_product)=\'%s\'' \ '' % (tjid, product) cur.execute(sql) res = cur.fetchone() if res is None: logger.error('找不到样本%s的医生总结评价!' % tjid) exit(1) logger.debug(res) conclusion['genetic'] = ensure_unicode(convert_text2tex(res[0])) conclusion['environment'] = ensure_unicode(convert_text2tex(res[1])) conclusion['conclusion'] = ensure_unicode(convert_text2tex(res[2])) # conclusion['recommendation'] = ensure_unicode(convert_text2tex(res[3])) conclusion['test_code'] = ensure_unicode(res[4]) conclusion['signature'] = ensure_unicode(res[5]) return conclusion
def to_mpris1(self): """ Converts the metadata to mpris1 dict """ ret = dbus.Dictionary(signature='sv') for k in ['title', 'artist', 'album', 'arturl', 'location']: if getattr(self, k) is not None: ret[k] = dbus.String(utils.ensure_unicode(getattr(self, k))) if self.tracknum >= 0: ret['tracknumber'] = dbus.String(self.tracknum) if self.length >= 0: ret['time'] = dbus.UInt32(self.length / 1000) ret['mtime'] = dbus.UInt32(self.length) for k, v in self._extra.items(): if k in Metadata.MPRIS1_KEYS and k not in ret: ret[k] = v return ret
def saveMetadata(pkgDescription, pkgWebscrape, conn): """Save metadata information we've scraped about packages into database tables: packages, citations, tags, staticdeps""" for rec in pkgWebscrape: # Fix up the URL: github URLs may be in a weird format (api.github instead of just github) if rec in pkgDescription: url = pkgDescription[rec].get("URL", [""]) if not isinstance(url, str): url = url[0] if url == "": url = pkgWebscrape[rec].get("url", "") if "/api.github" in url: url = "http://github.com/" + pkgWebscrape[rec]["user"] + "/" + rec # Create a unique record if does not exist in the packages table conn.execute("insert or ignore into packages (name) values (?)", (rec,)) # and now overwrite the existing record with the new information try: conn.execute("update packages set " + "title=?, description=?, authors=?, repository=?, url=? where name=?;", (pkgWebscrape[rec]["title"], utils.ensure_unicode(pkgWebscrape[rec]["description"]), pkgWebscrape[rec]["authors"] if "authors" in pkgWebscrape[rec] else "", pkgWebscrape[rec]["repository"], url, rec)) except Exception, e: print str(e), pkgWebscrape[rec] # Write whatever citation information we have into the citations table if "bibtex_citations" in pkgWebscrape[rec] and len(pkgWebscrape[rec]["bibtex_citations"] ) > 0: bibtex = pkgWebscrape[rec]["bibtex_citations"] citations = [cite.strip() for cite in pkgWebscrape[rec]["citation"].split("\n\n") if cite.strip() != ""] for (index, bib) in enumerate(bibtex): doi = fixDoi(extractBibtexField(bib,"doi")) year = extractBibtexField(bib,"year") title = extractBibtexField(bib,"title") author = extractBibtexField(bib,"author") citetext = citations[index % len(citations)] citepattern = re.sub("R package version \d.*?,", "R package version %,", citetext) rows = conn.execute("select * from citations where package_name=? and citation like ?;", (rec, citepattern)) if len(list(rows)) == 0: conn.execute("insert into citations (package_name, citation, title, author, year, doi, canonical, doi_given) " + \ " values (?,?,?,?,?,?,?,?)", \ (rec, citetext, title, author, year, doi, True, doi != "")) elif "citation" in pkgWebscrape[rec] and pkgWebscrape[rec]["citation"] != "HTTP Error 404: Not Found": citations = [cite.strip() for cite in pkgWebscrape[rec]["citation"].split("\n\n") if cite.strip() != ""] citations2 = [] # Pair citations with their embedded DOIs, if they exist for cite in citations: dois = list(extractDoiFromCitation(cite)) if len(dois) > 0: for doi in dois: citations2.extend([(cite, doi)]) else: citations2.extend([(cite,"")]) # Then write the pairs to the database for (cite,doi) in citations2: citepattern = re.sub("R package version \d.*?,", "R package version %,", cite) rows = conn.execute("select * from citations where package_name=? and citation like ?;", (rec, citepattern)) if len(list(rows)) == 0: conn.execute("insert into citations (package_name, citation, doi, canonical, doi_given) values (?,?,?,?,?)", \ (rec, cite, fixDoi(doi), True, doi != "")) # Now add in any information gleaned from the DESCRIPTION file if rec in pkgDescription: try: version = pkgDescription[rec].get("Version", [""]) if isinstance(version, basestring): version = "" elif len(version) == 0: version = "" else: version = version[0] conn.execute("update packages set " + "lastversion=? where name=?;", (version, rec)) except Exception, e: pdb.set_trace() print "Could not write package version information" # Now save static dependencies imports = list(set(pkgDescription[rec].get("Imports", []) + pkgDescription[rec].get("Depends", []) + pkgDescription[rec].get("Requires", []))) imports = [i for i in imports if legalimport.match(i)] conn.executemany("insert into staticdeps (package_name, depends_on) values (?,?);", [(rec, imp) for imp in imports])
def latex_table_line(self): line = u' & '.join([ensure_unicode(e) for e in self.elements]) logger.debug(line) return line
def get_report_tex(phi, gt_res, ques_res, pe_res, conclusion): """ Get latex code for the comprehensive report of a patient. Parameters ----------- phi : PHI PHI object of the patient. gt_res : list gene test records of the patient, list of GeneTestRecord object. ques_res : list questionnaire records of the patient, list of QuestionnaireRecord object. pe_res : list physical examination records the patient, list of PhysicalExaminationRecord object. conclusion : dict conclusion from doctor Returns -------- tex : str tex code string """ has_ques, has_pe = True, True if len(gt_res) == 0: logger.error('缺少基因检测结果信息!') exit(1) if len(ques_res) == 0: has_ques = False logger.warn('没有找到此样本相关的问卷调查信息!') if len(pe_res) == 0: has_pe = False logger.warn('没有找到此样本相关的体检指标信息!') ques_res_dict = { ensure_unicode(category): [] for category in question_class } for record in ques_res: if record.lbmc is None: continue category = ensure_unicode(record.lbmc.split('-')[-1]) if category in question_class: ques_res_dict[category].append(record) # pe_dict = {ic: '' for ic in test_codes} # pe_res_dict = {record.item_code: record for record in pe_res} # for code in test_codes: # if code not in pe_res_dict.keys(): # logger.warn('%s没有体检项目号为%s的体检信息.' % (pe_res[0].tjid, code)) # continue # pe_dict[code] = pe_res_dict[code] group_pe = group_physical_examination_records(pe_res) template = latex_env.get_template(base_template) tex = template.render( phi=phi, gene_test_records=group_gene_test_result(gt_res), ques_res=ques_res_dict, group_pe=group_pe, conclusion=conclusion, report_date=datetime.datetime.now().strftime('%Y-%m-%d'), has_ques=has_ques, has_pe=has_pe, ) return tex
def has_suffix(self): return utils.ensure_unicode(self.original).lower() != utils.ensure_unicode(self.root).lower()
import socket from time import ctime import utils HOST = '' PORT = 21567 BUFSIZE = 1024 ADDR = (HOST, PORT) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(ADDR) sock.listen(5) while True: print("waiting for connection...") client_sock, addr = sock.accept() print("connection from: ", addr) while True: data = client_sock.recv(BUFSIZE) if not data: break msg = '[%s] %s' % (ctime(), utils.ensure_unicode(data)) print('Received: ', msg) client_sock.send(utils.ensure_bytes(msg)) client_sock.close() sock.close()
import socket import utils HOST = 'localhost' service = 'daytime' port = socket.getservbyname(service) sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.connect((HOST, port)) sock.send(utils.ensure_bytes(service)) data = sock.recvfrom(1024) print(utils.ensure_unicode(data)) sock.close()
def handle(self): print("...connection from: ", self.client_address) msg = '[%s] %s' % (ctime(), utils.ensure_unicode(self.rfile.readline()).strip()) print(msg) self.wfile.write(utils.ensure_bytes(msg))