def test_auth(): h = hapy.Hapy( BASE_URL, username='******', password='******' ) a = requests.auth.HTTPDigestAuth('username', 'password') assert_equals( a.username, h.auth.username ) assert_equals( a.password, h.auth.password )
def test_supply_timeout(mock_requests): h = hapy.Hapy(BASE_URL, timeout=0.005) r = Mock() r.status_code = 303 r.request = Mock() mock_requests.post.return_value = r name = 'test_build_job' h.build_job(name) mock_requests.post.assert_called_with( url='https://localhost:8443/engine/job/%s' % name, data=dict( action='build' ), auth=None, verify=False, headers={'accept': 'application/xml'}, allow_redirects=False, timeout=0.005 )
def restart_job(frequency, start=datetime.utcnow()): """Restarts the job for a particular frequency.""" logger.info("Restarting %s at %s" % (frequency, start)) try: w = w3act(args.w3act_url, args.w3act_user, args.w3act_pw) export = w.get_ld_export(frequency) logger.debug("Found %s Targets in export." % len(export)) targets = [ t for t in export if (t["crawlStartDateISO"] is None or dateutil.parser.parse(t["crawlStartDateISO"]) < start) and ( t["crawlEndDateISO"] is None or dateutil.parser.parse(t["crawlEndDateISO"]) > start) ] logger.debug("Found %s Targets in date range." % len(targets)) h = hapy.Hapy("https://%s:%s" % (args.host, args.port), username=args.user, password=args.password) #h = heritrix.API(host="https://%s:%s/engine" % (settings.HERITRIX_HOST, settings.HERITRIX_PORTS[frequency]), user="******", passwd="bl_uk", verbose=False, verify=False) if frequency in h.listjobs() and h.status(frequency) != "": stop_running_job(frequency, h) #TODO: Automated QA job = W3actJob(targets, name=frequency, heritrix=h) if not args.test: logger.debug("Starting job %s with %s seeds." % (job.name, len(job.seeds))) job.start() else: logger.debug("Would start job %s with %s seeds." % (job.name, len(job.seeds))) logger.debug("Seeds:") for surl in job.seeds: logger.debug("- %s" % surl) except: logger.error("%s: %s" % (frequency, str(sys.exc_info()))) logger.error("%s: %s" % (frequency, traceback.format_exc()))
def main(argv=None): """ h3cc Command crawler control. """ if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_name = os.path.basename(sys.argv[0]) program_version = "v%s" % __version__ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) #program_shortdesc = __import__('__main__').__doc__.split("\n")[1] program_shortdesc = __import__('__main__').__doc__ program_license = '''%s Created by Andrew Jackson on %s. Copyright 2016 The British Library. Licensed under the Apache License 2.0 http://www.apache.org/licenses/LICENSE-2.0 Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. USAGE ''' % (program_shortdesc, str(__date__)) try: # Setup argument parser parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]", default=0) parser.add_argument('-V', '--version', action='version', version=program_version_message) parser.add_argument( '-j', '--job', dest='job', default='frequent', help="Name of job to operate upon. [default: %(default)s]") parser.add_argument( '-H', '--host', dest='host', default='localhost', help="Name of host to connect to. [default: %(default)s]") parser.add_argument( '-P', '--port', dest='port', default='8443', help="Secure port to connect to. [default: %(default)s]") parser.add_argument( '-u', '--user', dest='user', type=str, default="heritrix", help="H3 user to login with [default: %(default)s]") parser.add_argument('-p', '--password', dest='password', type=str, default="heritrix", help="H3 user password [default: %(default)s]") parser.add_argument( '-q' '--query-url', dest='query_url', type=str, default='http://www.bbc.co.uk/news', help="URL to use for queries [default: %(default)s]") parser.add_argument( '-l' '--query-limit', dest='query_limit', type=int, default=10, help= "Maximum number of results to return from queries [default: %(default)s]" ) parser.add_argument(dest="command", help="Command to carry out. One of: " + ", ".join(H3_SCRIPTS_JOB + H3_SCRIPTS_JOB_URL) + ". [default: %(default)s]", metavar="command") # Process arguments args = parser.parse_args() # Up the logging verbose = args.verbose if verbose > 0: logger.setLevel(logging.DEBUG) # talk to h3: ha = hapy.Hapy("https://%s:%s" % (args.host, args.port), username=args.user, password=args.password) job = args.job # Commands: command = args.command if command == "status": print(ha.get_info()) elif command == "list-jobs": # FIXME Cope when singular hash or array of hashes j = ha.get_info()['engine']['jobs']['value'] print(j['key']) elif command == "job-summary": # FIXME Cope when singular hash or array of hashes for j in ha.get_info()['engine']['jobs']['value']: if job == j['key']: print(j) elif command == "job-build": ha.build_job(job) elif command == "job-launch": ha.launch_job(job) elif command == "job-resume": ha.launch_from_latest_checkpoint(job) elif command == "job-pause": ha.pause_job(job) elif command == "job-unpause": ha.unpause_job(job) elif command == "job-checkpoint": ha.checkpoint_job(job) elif command == "job-terminate": ha.terminate_job(job) elif command == "job-teardown": ha.teardown_job(job) elif command == "job-status": print(ha.get_job_info(job)['job']['statusDescription']) elif command == "job-info": print(ha.get_job_info(job)) elif command == "job-info-json": print(json.dumps(ha.get_job_info(job), indent=4)) elif command == "job-cxml": print(ha.get_job_configuration(job)) elif command in H3_SCRIPTS_JOB: template = env.get_template('%s.groovy' % command) r = ha.execute_script(engine="groovy", script=template.render(), name=job) print(r[0]) elif command in H3_SCRIPTS_JOB_URL: template = env.get_template('%s.groovy' % command) r = ha.execute_script(engine="groovy", script=template.render({ "url": args.query_url, "limit": args.query_limit }), name=job) print(r[0]) else: logger.error("Can't understand command '%s'" % command) return 0 except KeyboardInterrupt: ### handle keyboard interrupt ### return 0 except Exception as e: indent = len(program_name) * " " sys.stderr.write(program_name + ": " + repr(e) + "\n") sys.stderr.write(indent + " for help use --help") logger.exception(e) return 2
while action not in info['job']['availableActions']['value']: time.sleep(1) info = h.get_job_info(job_name) # main try: name = sys.argv[1] except IndexError: print "Usage: run_job.py job-name" sys.exit(1) try: h = hapy.Hapy('https://localhost:8443', username='******', password='******') state = get_state(h, name) if cmp(state, "running"): print "error: job is still running" sys.exit(1) elif not cmp(state, "finished"): print "error: job is in unexpected state: %s" % state sys.exit(1) # job should be finished by now h.teardown_job(name) wait_for_state(h, name, "unbuilt") # now build it h.build_job(name)
def test_auth_no_username(): h = hapy.Hapy(BASE_URL, password='******') assert_is_none(h.auth)
def test_auth_no_password(): h = hapy.Hapy(BASE_URL, username='******') assert_is_none(h.auth)
def test_auth_nothing(): h = hapy.Hapy(BASE_URL) assert_is_none(h.auth)
def test_url_normalise(): h1 = hapy.Hapy('http://localhost:8443') h2 = hapy.Hapy('http://localhost:8443/') assert_equals(h1.base_url, h2.base_url)
def setup(): global h h = hapy.Hapy(BASE_URL)