def parse_loop(debug_limit=None, verbose=False, fail_fast=False): parsed = 0 while True: q = RawXmlBlob.query.filter(RawXmlBlob.parsed == False) if verbose: print '%d blobs need to be parsed.' % q.count() xmlblob = q.first() if xmlblob is None: return try: # Lock this xmlblob so parallel threads dont pick it up xmlblob.parsed = True db.session.commit() # Recursively delete associated Activity/Transaction/etc objects xmlblob.activity = None # Parse new objects into the db xmlblob.activity, errors = parser.parse(xmlblob.raw_xml) db.session.commit() except Exception: db.session.rollback() print >>sys.stderr, "Could not parse xml blob id=%s" % xmlblob.id traceback.print_exc() xmlblob.parsed = False db.session.commit() if fail_fast: raise parsed += 1 if (debug_limit is not None) and parsed >= debug_limit: return
def load_fix(fix_name): # can be anything, there just needs to be > 0 db.session.add(CodelistSector(code=47045)) ir = IndexedResource(id=u"TEST") fix_xml = ET.parse(fixture_filename(fix_name)) for activity_xml in fix_xml.findall('iati-activity'): blob = RawXmlBlob( parent=ir, raw_xml=ET.tostring( activity_xml, encoding='utf-8').decode('utf-8')) db.session.add(blob) db.session.commit() activity, errors = parser.parse(blob.raw_xml) activity.parent_id = blob.id db.session.add(activity) db.session.commit()