def producer_queue(queue, ranges): for response_id, cleaned_content in read_session.query(Response.id, Response.cleaned_content).filter( and_(Response.schemas is not None, Response.schemas != '{}')).limit(ranges[0]).offset(ranges[1]).all(): stderr = validate_in_memory(cleaned_content) validated_data = { "response_id": response_id, "valid": 'Error at' not in stderr, "validated_on": datetime.now().isoformat() } if stderr: validated_data.update({ "errors": [s.strip() for s in stderr.split('\n\n') if s] }) queue.put(validated_data) queue.put('STOP')
# set up the connection with open('local_rds.conf', 'r') as f: conf = json.loads(f.read()) reader = ResponseReader(conf) loader = Loader(conf) # get the set, validate, store outputs # but need to paginate because of ram issues for i in xrange(0, 668110, 25): print 'QUERYING {0}:{1}'.format(i, 25) for response in reader.read('', limit=25, offset=i): print response.source_url xml = response.cleaned_content stderr = validate_in_memory(xml) data = { "response_id": response.id, "valid": 'Error at' not in stderr, "validated_on": datetime.now() } if stderr: data.update({"errors": [s.strip() for s in stderr.split('\n\n')]}) print '\t{0}'.format(stderr[:100]) try: v = Validation() v.create(data) loader.load(v) except Exception as ex:
# set up the connection with open('local_rds.conf', 'r') as f: conf = json.loads(f.read()) reader = ResponseReader(conf) loader = Loader(conf) # get the set, validate, store outputs # but need to paginate because of ram issues for i in xrange(0, 668110, 25): print 'QUERYING {0}:{1}'.format(i, 25) for response in reader.read('', limit=25, offset=i): print response.source_url xml = response.cleaned_content stderr = validate_in_memory(xml) data = { "response_id": response.id, "valid": 'Error at' not in stderr, "validated_on": datetime.now() } if stderr: data.update({ "errors": [s.strip() for s in stderr.split('\n\n')] }) print '\t{0}'.format(stderr[:100]) try: v = Validation() v.create(data)