示例#1
0
def main():
    sys.stdout.write('PDFrate Query Scheduler running! [{0}]\n'.format(
        datetime.datetime.now()))
    parser = ArgumentParser(description='PDFrate Query Scheduler')
    parser.parse_args()

    QUERY_DIR = config.get('pdfratequeryscheduler', 'query_dir')
    REPLY_DIR = config.get('pdfratequeryscheduler', 'reply_dir')

    queries = []
    max_priority = 0
    sys.stdout.write('Queries found: ')
    # Process all files in the QUERY_DIR
    for f in os.listdir(QUERY_DIR):
        f = os.path.join(QUERY_DIR, f)
        if not os.path.isfile(f) or os.path.splitext(f)[1] != '.json':
            continue
        try:
            queries.append(json.load(open(f, 'r')))
        except Exception as ex:
            sys.stderr.write('Error reading query file \'{f}\': {ex}\n'.format(
                f=f, ex=ex))
            continue
        # Keep track of max priority
        queries[-1]['queryfile'] = f
        if queries[-1]['priority'] > max_priority:
            max_priority = queries[-1]['priority']
        #sys.stdout.write('{0}\n'.format(queries[-1]))

    # In case of no queries
    if not queries:
        sys.stdout.write("None\nExiting.\n")
        return
    else:
        sys.stdout.write('{}\n'.format(len(queries)))

    # Filter for max priority queries
    sys.stdout.write('Max priority: {0}\n'.format(max_priority))
    if max_priority != 0:
        queries = [q for q in queries if q['priority'] == max_priority]
    # The oldest one is next
    top_query = min(queries, key=itemgetter('datetime'))
    del queries
    sys.stdout.write('Next query: {0}\n'.format(top_query))

    # Submit query to PDFrate and save the reply
    proxy = pdfrateproxy.PdfrateProxy()
    sleep_time = randint(
        0, int(config.get('pdfratequeryscheduler', 'sleep_time')))
    sys.stdout.write('Sleeping for {0} seconds...\n'.format(sleep_time))
    sleep(sleep_time)
    sys.stdout.write('Getting report...\n')
    reply = proxy.get_report(utility.file_sha256_hash(top_query['filename']))
    if reply['status'] == 'noreport':
        sys.stdout.write('No report, submitting file...\n')
        reply = proxy.submit_file(top_query['filename'])

    if top_query['get_metadata'] == True and reply['status'] == 'success':
        # Also get metadata
        file_hash = os.path.splitext(os.path.basename(
            top_query['queryfile']))[0]
        sys.stdout.write('Getting metadata...\n')
        metadata_reply = proxy.get_metadata(file_hash)
        reply['metadata'] = metadata_reply['metadata']
        reply['status'] = metadata_reply['status']

    reply_filename = os.path.join(REPLY_DIR,
                                  os.path.basename(top_query['queryfile']))
    reply['filename'] = top_query['filename']
    sys.stdout.write('Writing reply to disk...\n')
    json.dump(reply, open(reply_filename, 'w+'))
    # Remove query file
    sys.stdout.write('Removing query file...\n')
    os.remove(top_query['queryfile'])
    sys.stdout.write('Exiting.\n')
示例#2
0
 def _get_reply_for_file(self, filename):
     '''
     Given a file, generates the path to the reply file for it. 
     '''
     sha256 = utility.file_sha256_hash(filename)
     return os.path.join(self.reply_dir, '{0}.json'.format(sha256))
 def _get_reply_for_file(self, filename):
     '''
     Given a file, generates the path to the reply file for it. 
     '''
     sha256 = utility.file_sha256_hash(filename)
     return os.path.join(self.reply_dir, '{0}.json'.format(sha256))
def main():
    sys.stdout.write('PDFrate Query Scheduler running! [{0}]\n'.format(datetime.datetime.now()))
    parser = ArgumentParser(description = 'PDFrate Query Scheduler')
    parser.parse_args()
    
    QUERY_DIR = config.get('pdfratequeryscheduler', 'query_dir')
    REPLY_DIR = config.get('pdfratequeryscheduler', 'reply_dir')
    
    queries = []
    max_priority = 0
    sys.stdout.write('Queries found: ')
    # Process all files in the QUERY_DIR
    for f in os.listdir(QUERY_DIR):
        f = os.path.join(QUERY_DIR, f)
        if not os.path.isfile(f) or os.path.splitext(f)[1] != '.json':
            continue
        try:
            queries.append(json.load(open(f, 'r')))
        except Exception as ex:
            sys.stderr.write('Error reading query file \'{f}\': {ex}\n'.format(f=f, ex=ex))
            continue
        # Keep track of max priority
        queries[-1]['queryfile'] = f
        if queries[-1]['priority'] > max_priority:
            max_priority = queries[-1]['priority']
        #sys.stdout.write('{0}\n'.format(queries[-1]))
    
    # In case of no queries
    if not queries:
        sys.stdout.write("None\nExiting.\n")
        return
    else:
        sys.stdout.write('{}\n'.format(len(queries)))
    
    # Filter for max priority queries
    sys.stdout.write('Max priority: {0}\n'.format(max_priority))
    if max_priority != 0:
        queries = [q for q in queries if q['priority'] == max_priority]
    # The oldest one is next
    top_query = min(queries, key=itemgetter('datetime'))
    del queries
    sys.stdout.write('Next query: {0}\n'.format(top_query))
    
    # Submit query to PDFrate and save the reply
    proxy = pdfrateproxy.PdfrateProxy()
    sleep_time = randint(0, int(config.get('pdfratequeryscheduler', 'sleep_time')))
    sys.stdout.write('Sleeping for {0} seconds...\n'.format(sleep_time))
    sleep(sleep_time)
    sys.stdout.write('Getting report...\n')
    reply = proxy.get_report(utility.file_sha256_hash(top_query['filename']))
    if reply['status'] == 'noreport':
        sys.stdout.write('No report, submitting file...\n')
        reply = proxy.submit_file(top_query['filename'])
    
    if top_query['get_metadata'] == True and reply['status'] == 'success':
        # Also get metadata
        file_hash = os.path.splitext(os.path.basename(top_query['queryfile']))[0]
        sys.stdout.write('Getting metadata...\n')
        metadata_reply = proxy.get_metadata(file_hash)
        reply['metadata'] = metadata_reply['metadata']
        reply['status'] = metadata_reply['status']
    
    reply_filename = os.path.join(REPLY_DIR, os.path.basename(top_query['queryfile']))
    reply['filename'] = top_query['filename']
    sys.stdout.write('Writing reply to disk...\n')
    json.dump(reply, open(reply_filename, 'w+'))
    # Remove query file
    sys.stdout.write('Removing query file...\n')
    os.remove(top_query['queryfile'])
    sys.stdout.write('Exiting.\n')