def main(): dh = Datahelper() try: chembl = pymysql.connect(host=os.environ["CHHOST"], user=os.environ["CHUSER"], passwd=os.environ["CHPWD"], port=int(os.environ["CHPORT"]), db=os.environ["CHDB"]) except: #print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info() exit() count = 0 for line in sys.stdin: data = line.strip().split(',') atc_code = data[3] rdata = data[:4] query = "select level3_description from atc_classification where level3 = '%s' limit 1" % ( atc_code) count = 0 with chembl.cursor() as cursor: cursor.execute(query) for row in cursor: count += 1 descr = dh.get_normalised_phrase(row[0].lower()) rdata.append(descr) rdata.append(data[4]) print ','.join(rdata) chembl.close() return count
def main(options): count = 0 mcount = 0 umcount = 0 dh = Datahelper() try: fh = open(options.synfile, "r") synonyms = load_synonyms(fh) except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) exit() except TypeError as e: print "Missing arguments ", e exit() except: print "Unexpected error:", sys.exc_info() exit() for line in sys.stdin: count += 1 data = line.strip().split(',') phrase = data[1].lower() matched = False for key in dh.get_merge_key_list(phrase): if key in synonyms: print "%s,%s,%s" % (data[0], phrase, '|'.join(synonyms[key])) matched = True mcount += 1 break if matched == False: print "%s,%s" % (data[0], phrase) return count, mcount
def main(): dh = Datahelper() try: except: #print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info() exit() count = 0 for line in sys.stdin: data = line.strip().split(',') atc_code = data[0] query = "select level1_description, level2_description, level3_description from atc_classification where level3 = '%s' limit 1" % (atc_code) count = 0 with chembl.cursor() as cursor: cursor.execute(query) for row in cursor: count += 1 descr = row[0].lower() + ": " + dh.get_normalised_phrase(row[1].lower() + " " + row[2].lower()) data.append(descr) print ','.join(data) chembl.close() return count
def main(options): """ """ dh = Datahelper() try: fh = open(options.bnfcodes, "r") code_lookup = load_code_list(fh) except: print "Unexpected error:", sys.exc_info() exit() print "pheno,PHENOTYPE,Category,type" count = 0 for line in sys.stdin: data = line.strip().split(',') count = 0 if data[0] in code_lookup: count += 1 pheno_string = dh.get_normalised_phrase(code_lookup[data[0]]) pheno_string = dh.make_pheno_string(pheno_string) #data.append(data[0] + "_" + pheno_string) data.append(data[0]) data.append(pheno_string) data.append("BINARY") print ','.join(data) return count
def main(): try: dh = Datahelper() except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) print "I/O error:", sys.exc_info() exit() except TypeError as e: print "Missing arguments ", e exit() except: #print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info() exit() ewords = dh.get_excluded_words() count = 0 outarray = [] for wd in ewords: count += 1 outarray.append(wd) if count % 4 == 0: print ",".join(outarray) outarray = [] if len(outarray) > 0: print ",".join(outarray) #print count return
def main(): """ Requires the input to be sorted on the first field (the numeric molregno). One record per molregno is output. Calls a datahelper function to normalise each word or phrase (convert to lower case and remove special characters) """ count = 0 last_molno = "" related_synonyms = [] dh = Datahelper() for line in sys.stdin: data = line.strip().split('\t') if data[0] != last_molno and last_molno != "": print '|'.join(related_synonyms) + "|MOLREGNO:" + last_molno related_synonyms = [] last_molno = data[0] text = dh.get_normalised_phrase(data[1]) stype = data[2] syn = stype + ":" + text if syn not in related_synonyms: related_synonyms.append(syn) # output the last synonym group print '|'.join(related_synonyms) + "|MOLREGNO:" + last_molno
def main(options): dh = Datahelper() for line in sys.stdin: data = line.strip().split(',') data[0] = dh.format_atc_code(data[0], int(options.codelen)) data[1] = data[1].lower() print ','.join(data)
def main(options): count = 0 dh = Datahelper() for line in sys.stdin: count += 1 data = line.strip().split(',') data[2] = dh.format_atc_code(data[2], int(options.codelen)) print ','.join(data) return count
def main(options): """ The main match process - look up descriptions and synonyms in the coding data dictionary (loaded on initialisation SEE ALSO: datahelper.py """ dcount = 0 count = 0 match_count = 0 miss_count = 0 # try to load the classification system codes file try: fh = open(options.clsfile, "r") dh = Datahelper() dcount = dh.load_cls_phrases(fh) #print "Dictionary size = %d" % (dcount) except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) print "I/O error:", sys.exc_info() exit() except TypeError as e: print "Missing arguments ", e exit() except: #print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info() exit() # stdin used to read in medications coding data hdr = sys.stdin.readline() for line in sys.stdin: count += 1 matched = False data = line.strip().split(',') all_phrases = [data[1]] if len(data) == 3: all_phrases += data[2].split('|') match_string = "" code_array, match_data, last_match, selected_code = dh.match_all_phrases(all_phrases) if len(code_array) > 0: # Current policy: output one line per code match (can be multiple per input record for code_elem in code_array: code_data = code_elem.split("~") print "%s,%s,%s,%s,%s,%s,%d" % (data[0], data[1], last_match, '|'.join(match_data), code_data[1], code_data[0], len(code_array)) match_count += 1 else: print "%s,%s,%s,%s,%s,%s,0" % (data[0], data[1], last_match, '|'.join(match_data), "NA", "NA") miss_count += 1 return count, match_count, miss_count
def main(): count = 0 dh = Datahelper() hdr = sys.stdin.readline().strip() print hdr for line in sys.stdin: count += 1 data = line.strip().split(',') data[0] = dh.format_digit_code(data[0], 3) data[1] = data[1].lower() print ','.join(data) return count
def main(): count = 0 last_molno = "" related_synonyms = [] dh = Datahelper() for line in sys.stdin: data = line.strip().split('\t') print "%s\t%s" % (data[1], data[0])
def main(options): """ Access the CHEMBL db for each input line and use the description from the appropriate level """ level = int(options.level) dh = Datahelper() try: chembl = pymysql.connect(host=os.environ["CHHOST"], user=os.environ["CHUSER"], passwd=os.environ["CHPWD"], port=int(os.environ["CHPORT"]), db=os.environ["CHDB"]) except: #print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info() exit() print "pheno,PHENOTYPE,Category,type" count = 0 for line in sys.stdin: data = line.strip().split(',') atc_code = data[0] query = "select level%d_description from atc_classification where level%d = '%s' limit 1" % ( level, level, atc_code) count = 0 cursor = chembl.cursor() cursor.execute(query) for row in cursor: count += 1 pheno_string = dh.get_normalised_phrase(row[0]) pheno_string = dh.make_pheno_string(pheno_string) data.append(data[0] + "_" + pheno_string) data.append(pheno_string) data.append("BINARY") print ','.join(data) chembl.close() return count
def main(options): count = 0 match_count = 0 miss_count = 0 dh = Datahelper() try: fh = open(options.codefile, "r") code_lookup = load_cs_codes(fh) #print len(synonyms) except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) exit() except TypeError as e: print "Missing arguments ", e exit() except: #print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info() exit() hdr = sys.stdin.readline().strip() print "%s,%s" % (hdr, "cs_code") for line in sys.stdin: count += 1 data = line.strip().split(',') data.append("NA") if data[1] in code_lookup: for code in code_lookup[data[1]]: data[-1] = code match_count += 1 print ",".join(data) else: miss_count += 1 print ",".join(data) return count, match_count, miss_count