def update(logger): spls = [os.path.join(UPDATE_DIR, f) for f in os.listdir(UPDATE_DIR) if f.endswith(".xml")] for cnt, spl in enumerate(spls): p = XMLParser(huge_tree=True) tree = parse(spl, parser=p) root = tree.getroot() tags = get_tags(root, spl, logger) if len(tags.keys()) == 0: print "\nERROR: get_tags failed, most likely because a UNII could not be retrieved for all active moities. Please check the following spl: %s" % spl continue if not setid_in_rxnorm(tags['setId']): logger.info("SetId {0} from file {1} not found in rxnorm".format(tags['setId'],spl)) continue cursor.execute("SELECT id FROM structuredProductLabelMetadata WHERE setId=%s",tags['setId']) rowid = cursor.fetchone() if rowid: rowid = rowid[0] copy_to_master_dir(spl, setid=tags['setId']) for name, value in tags.items(): if name == 'activeMoieties': insert_active_moieties(value, tags["activeMoietyUNIIs"]) link_spl_to_active_moieties(tags['setId'], value) elif name == "activeMoietyUNIIs": continue else: update_db("structuredProductLabelMetadata", name, value, "id", rowid) splSections = get_sections(root) for code in splSections: try: cursor.execute("SELECT table_name FROM loinc WHERE loinc={0}".format(code)) table = cursor.fetchone()[0] except TypeError: logger.debug("LOINC code not for SPL section found in the database: %s. This section will not be loaded for spl. Try updating the LOINC codes and re-loading this SPL.") continue #for sM in tableToSectionMap: # (code, table) = (sM[0], sM[1]) (sectElt, codeElt) = get_section(root, code) # if sectElt is None: # logger.info("No section: %s, %s" % sM) # continue ##gag: Recursive function to retrieve text from a section allText = get_section_text(sectElt, "") ##If the section already existed in the spl, update it's entry, ##else add a new entry for the section for that spl if has_entry(table, "splId", rowid): update_db(table, "field", allText, "splId", rowid) else: insert_section_entry(table, rowid, allText) logger.info(tags['setId'] + " updated") else: copy_to_master_dir(spl) run(logger, [spl]) print_progress(cnt+1, len(spls), spl) con.commit() con.close()
def run(logger, spls, limit=None): count = 0 for splF in spls: print "\n Start parsing: {0}".format(splF) #tree = etree.ElementTree(file=splF) p = XMLParser(huge_tree=True) tree = parse(splF, parser=p) root = tree.getroot() tags = get_tags(root, splF, logger) #print "[DEBUG] tags: " + str(tags) if len(tags.keys()) == 0: print "\nERROR: get_tags failed, most likely because a UNII could not be retrieved for all active moities. Please check the following spl: %s" % splF continue if not setid_in_rxnorm(tags['setId']): logger.info("SetId {0} from file {1} not found in rxnorm".format(tags['setId'],splF)) continue ## check if there are deplicated setId cursor.execute("SELECT id FROM structuredProductLabelMetadata WHERE setId=%s",[tags['setId']]) idExists = cursor.fetchall() if idExists: print "\n duplicated setId %s in file %s" % (tags['setId'], splF) continue try: insert_active_moieties(tags['activeMoieties'], tags['activeMoietyUNIIs']) insertQuery = "INSERT INTO structuredProductLabelMetadata(setId, versionNumber, fullName, routeOfAdministration, genericMedicine, representedOrganization, effectiveTime, filename) VALUES(%s,%s,%s,%s,%s,%s,%s,%s)" values = (tags['setId'], tags['versionNumber'], tags['fullName'], tags['routeOfAdministration'], tags['genericMedicine'], tags['representedOrganization'], tags['effectiveTime'], tags['filename']) cursor.execute(insertQuery, values) link_spl_to_active_moieties(tags['setId'], tags['activeMoieties']) cursor.execute("SELECT id FROM structuredProductLabelMetadata WHERE setId=%s",[tags['setId']]) splId = cursor.fetchone()[0] #print "[DEBUG] insert active moieties - done" splSections = get_sections(root) #print "[DEBUG] splSections: " + str(splSections) for code in splSections: cursor.execute("SELECT table_name FROM loinc WHERE loinc='{0}'".format(code)) res = cursor.fetchone if res: table = cursor.fetchone()[0] else: logger.info("Filename: {0}\tSetId: {1}\t no table name from loinc".format(tags['filename'],tags['setId'])) continue (sectElt, codeElt) = get_section(root, code) #gag: Recursive function to retrieve text from a section allText = get_section_text(sectElt, "") cursor.execute("INSERT INTO `{0}`(splId, field) VALUES({1}, '{2}')".format(table, splId, allText.encode('utf8').replace("'","\\'"))) logger.info("Filename: {0}\tSetId: {1}\tadded".format(tags['filename'],tags['setId'])) count +=1 if limit is not None and count == limit: break if len(spls) > 1: print_progress(spls.index(splF)+1, len(spls), splF) con.commit() except mdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) con.rollback() con.commit() os.rename ("spls/{0}".format(splF),"problematic-spls/{0}".format(splF)) continue except: