def producer_queue(queue, ranges):
    for response_id, cleaned_content in read_session.query(Response.id, Response.cleaned_content).filter(
            and_(Response.schemas is not None, Response.schemas != '{}')).limit(ranges[0]).offset(ranges[1]).all():
        stderr = validate_in_memory(cleaned_content)
        validated_data =  {
            "response_id": response_id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now().isoformat()
        }
        if stderr:
            validated_data.update({
                "errors": [s.strip() for s in stderr.split('\n\n') if s]
            })
        queue.put(validated_data)
    queue.put('STOP')
Exemplo n.º 2
0
# set up the connection
with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
reader = ResponseReader(conf)

loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
        }
        if stderr:
            data.update({"errors": [s.strip() for s in stderr.split('\n\n')]})
            print '\t{0}'.format(stderr[:100])

        try:
            v = Validation()
            v.create(data)
            loader.load(v)
        except Exception as ex:
# set up the connection
with open('local_rds.conf', 'r') as f:
    conf = json.loads(f.read())
reader = ResponseReader(conf)

loader = Loader(conf)

# get the set, validate, store outputs
# but need to paginate because of ram issues
for i in xrange(0, 668110, 25):
    print 'QUERYING {0}:{1}'.format(i, 25)
    for response in reader.read('', limit=25, offset=i):
        print response.source_url

        xml = response.cleaned_content
        stderr = validate_in_memory(xml)

        data = {
            "response_id": response.id,
            "valid": 'Error at' not in stderr,
            "validated_on": datetime.now()
        }
        if stderr:
            data.update({
                "errors": [s.strip() for s in stderr.split('\n\n')]
            })
            print '\t{0}'.format(stderr[:100])

        try:
            v = Validation()
            v.create(data)