def generateXML(offloadFile, DEID): entries = organizeDATA.organizeANDparse(offloadFile) if not DEID: XMLfile = offloadFile.replace(".txt",".noDEID.xml") save2XML(entries, XMLfile) return XMLfile else: import importDEIDresources DEIDlists=importDEIDresources.getGazetteers() print "DEID resources imported" DEIDentries=[] for entry in entries: DEID = DeIdentifier( entry["contents"], DEIDlists ) entry["contentsDEID"]= resetControlCodes( DEID.deidentifyRAW() ) del entry["contents"] DEIDentries.append(entry) DEIDXMLfile = offloadFile.replace(".txt",".DEID.xml") save2XML(DEIDentries, DEIDXMLfile) return DEIDXMLfile
basket=["","","",words[index],"","",""] i=0 for x in xrange(index-3,index+4): if x >= 0: try: if words[x] != '': basket[i]=words[x].lower() except: basket[i]="" i+=1 print "===", basket if __name__ == "__main__": print ''' ### Commandline usage: echo "This is my text with a name, Marc, in it." | python DeIdentifier.py ### ''' import sys '''Collect and read in all resources''' import importDEIDresources DEIDlists = importDEIDresources.getGazetteers() dataPoint=sys.stdin.read() DEID=DeIdentifier(dataPoint, DEIDlists) DEIDtext=DEID.deidentifyRAW() print "output:", DEIDtext