def setUp(self): self.conf = CONFIG self.db = {} self.input_queue = PriorityQueue() self.filtered_queue = PriorityQueue() self.filter = StatuslistFilter(self.conf, self.input_queue, self.filtered_queue, self.db)
def __init__(self, target, threads=100, mode=False): self.start_time = time.time() self.target = target.strip() self.threads = threads self.file = "subnames.txt" self.full_scan = mode self.ignore_intranet = False self.scan_count = self.found_count = 0 self.console_width = getTerminalSize()[0] - 2 self.resolvers = [ dns.resolver.Resolver(configure=False) for _ in range(self.threads) ] for _ in self.resolvers: _.lifetime = _.timeout = 10.0 self.print_count = 0 self.basedir = os.path.dirname(os.path.dirname(__file__)) self.STOP_ME = False self._load_dns_servers() self._load_next_sub() self.queue = PriorityQueue() self.priority = 0 self._load_sub_names() self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers self.result_domains = [] self.result_ips = []
def __init__(self, *params): ( self.domain, self.options, self.process_num, self.dns_servers, self.next_subs, self.scan_count, self.found_count, self.queue_size_array, tmp_dir, ) = params self.dns_count = len(self.dns_servers) self.scan_count_local = 0 self.found_count_local = 0 self.resolvers = [ dns.resolver.Resolver(configure=False) for _ in range(self.options.threads) ] for r in self.resolvers: r.lifetime = r.timeout = 10.0 self.queue = PriorityQueue() self.priority = 0 self.ip_dict = {} self.found_subs = set() self.timeout_subs = {} self.count_time = time.time() self.outfile = open( "%s/%s_part_%s.txt" % (tmp_dir, self.domain, self.process_num), "w") self.normal_names_set = set() self.load_sub_names() self.lock = RLock()
def __init__(self, target, subdomainfile=None): self.start_time = time.time() self.target = target.strip() self.ignore_intranet = config.ignore_intranet self.scan_count = self.found_count = 0 self.console_width = getTerminalSize()[0] - 2 self.threads = config.threads self.resolvers = [ dns.resolver.Resolver(configure=False) for _ in range(self.threads) ] for _ in self.resolvers: _.lifetime = _.timeout = 10.0 self.print_count = 0 self.STOP_ME = False try: self.full_scan = config.full_scan except: self.full_scan = False self.subdomainfile = subdomainfile if subdomainfile != None else config.subnamefile self.basedir = os.path.dirname( os.path.dirname(__file__)) #Teemo home dir self._load_dns_servers() self._load_next_sub() self.queue = PriorityQueue() self.priority = 0 self._load_sub_names() self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers self.result_lines = [] self.result_domains = [] self.result_ips = []
def __init__(self, **kwargs): self.connection_cls = kwargs.get('connection_cls', AWSConnection) self.aws_region = kwargs.get('aws_region') self.aws_access_key_id = kwargs.get('aws_access_key_id') self.aws_secret_access_key = kwargs.get('aws_secret_access_key') self.log_group_name = kwargs.get('log_group_name') self.log_stream_name = kwargs.get('log_stream_name') self.watch = kwargs.get('watch') self.color_enabled = kwargs.get('color_enabled') self.output_stream_enabled = kwargs.get('output_stream_enabled') self.output_group_enabled = kwargs.get('output_group_enabled') self.start = self.parse_datetime(kwargs.get('start')) self.end = self.parse_datetime(kwargs.get('end')) self.pool_size = max(kwargs.get('pool_size', 0), 10) self.max_group_length = 0 self.max_stream_length = 0 self.publishers = [] self.events_queue = Queue() self.raw_events_queue = PriorityQueue() self.publishers_queue = PriorityQueue() self.publishers = [] self.stream_status = {} self.stream_max_timestamp = {} self.connection = self.connection_cls( self.aws_region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key )
def __init__(self, **kwargs): self.connection_cls = kwargs.get("connection_cls", AWSConnection) self.aws_region = kwargs.get("aws_region") self.aws_access_key_id = kwargs.get("aws_access_key_id") self.aws_secret_access_key = kwargs.get("aws_secret_access_key") self.log_group_name = kwargs.get("log_group_name") self.log_stream_name = kwargs.get("log_stream_name") self.watch = kwargs.get("watch") self.color_enabled = kwargs.get("color_enabled") self.output_stream_enabled = kwargs.get("output_stream_enabled") self.output_group_enabled = kwargs.get("output_group_enabled") self.start = self.parse_datetime(kwargs.get("start")) self.end = self.parse_datetime(kwargs.get("end")) self.pool_size = max(kwargs.get("pool_size", 0), 10) self.max_group_length = 0 self.max_stream_length = 0 self.publishers = [] self.events_queue = Queue() self.raw_events_queue = PriorityQueue() self.publishers_queue = PriorityQueue() self.publishers = [] self.stream_status = {} self.stream_max_timestamp = {} self.connection = self.connection_cls( self.aws_region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key )
def __init__(self, name, fsm_id, states, initial_state, tracer, channel_tracer, fsm_registry, fsm_id_seq, inventory, play_header, outputs): self.shutting_down = False self.is_shutdown = False self.fsm_registry = fsm_registry self.name = name self.fsm_id = fsm_id self.tracer = tracer self.channel_tracer = channel_tracer self.state = initial_state self.states = states self.inbox = PriorityQueue() self.message_buffer = Queue() self.self_channel = Channel(self, self, tracer, self.inbox) self.worker = AnsibleTaskWorker(tracer, next(fsm_id_seq), inventory, play_header) self.worker_output_queue = Queue() self.worker.controller.outboxes['output'] = self.worker_output_queue self.worker.queue.put(Inventory(0, inventory)) self.outboxes = dict(default=None) self.last_event = NULL_EVENT self.task_id_seq = count(0) self.failure_count = 0 if outputs: self.outboxes.update({name: None for name in outputs}) self.thread = gevent.spawn(self.receive_messages)
def __init__(self, target, options): self.start_time = time.time() self.target = target.strip() self.options = options self.ignore_intranet = options.i self.scan_count = self.found_count = 0 self.console_width = getTerminalSize()[0] - 2 self.resolvers = [ dns.resolver.Resolver(configure=False) for _ in range(options.threads) ] for _ in self.resolvers: _.lifetime = _.timeout = 10.0 self.print_count = 0 self.STOP_ME = False self._load_dns_servers() self._load_next_sub() self.queue = PriorityQueue() self.priority = 0 self._load_sub_names() if options.taskid: self.taskid = options.taskid else: self.taskid = 0 self.ip_dict = {} self.found_subs = set() self.sub_domain = [] self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers
def __init__(self, target, options): self.start_time = time.time() self.target = target.strip() self.options = options self.ignore_intranet = options.i self.scan_count = self.found_count = 0 self.console_width = getTerminalSize()[0] - 2 self.resolvers = [ dns.resolver.Resolver(configure=False) for _ in range(options.threads) ] for _ in self.resolvers: _.lifetime = _.timeout = 10.0 self.print_count = 0 self.STOP_ME = False self._load_dns_servers() self._load_next_sub() self.queue = PriorityQueue() self.priority = 0 self._load_sub_names() if options.output: outfile = options.output else: _name = os.path.basename(self.options.file).replace('subnames', '') if _name != '.txt': _name = '_' + _name outfile = target + _name if not options.full_scan else target + '_full' + _name self.outfile = open(outfile, 'w') self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers self.result_lines = [] self.result_domains = [] self.result_ips = []
def __init__(self, **kwargs): self.connection_cls = kwargs.get('connection_cls', AWSConnection) self.aws_region = kwargs.get('aws_region') self.aws_access_key_id = kwargs.get('aws_access_key_id') self.aws_secret_access_key = kwargs.get('aws_secret_access_key') self.log_group_name = kwargs.get('log_group_name') self.log_stream_name = kwargs.get('log_stream_name') self.watch = kwargs.get('watch') self.color_enabled = kwargs.get('color_enabled') self.output_stream_enabled = kwargs.get('output_stream_enabled') self.output_group_enabled = kwargs.get('output_group_enabled') self.start = self.parse_datetime(kwargs.get('start')) self.end = self.parse_datetime(kwargs.get('end')) self.pool_size = max(kwargs.get('pool_size', 0), 10) self.max_group_length = 0 self.max_stream_length = 0 self.publishers = [] self.events_queue = Queue() self.raw_events_queue = PriorityQueue() self.publishers_queue = PriorityQueue() self.publishers = [] self.stream_status = {} self.stream_max_timestamp = {} self.connection = self.connection_cls( self.aws_region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key)
def __init__(self, target, options, process_num, dns_servers, cdns,next_subs, scan_count, found_count, queue_size_list, tmp_dir): self.target = target.strip() self.options = options self.process_num = process_num self.dns_servers = dns_servers self.cdns = cdns self.dns_count = len(dns_servers) self.next_subs = next_subs self.scan_count = scan_count self.scan_count_local = 0 self.found_count = found_count self.found_count_local = 0 self.queue_size_list = queue_size_list self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)] for _r in self.resolvers: _r.lifetime = _r.timeout = 6.0 self.queue = PriorityQueue() self.item_index = 0 self.priority = 0 self._load_sub_names() self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = dns_servers self.local_time = time.time() self.outfile = open('%s/%s_part_%s.txt' % (tmp_dir, target, process_num), 'w')
def __init__(self, l, func, num=20): self.queue = PriorityQueue() for item in l: self.queue.put(item) self.num = num self.func = func self.stop = False self.results = PriorityQueue()
def __init__(self, from_fsm, to_fsm, tracer, queue=None): if queue is None: self.queue = PriorityQueue() else: self.queue = queue self.from_fsm = from_fsm self.to_fsm = to_fsm self.tracer = tracer
def __init__(self): self.start_time = time.time() self.queue = PriorityQueue() self.history = [] self.total_count = 0 self.scan_count = 0 self._load_target() self.outfile = open("log.log", 'w') self.console_width = getTerminalSize()[0] - 2
def __init__(self, factory, maxsize=200, timeout=60): self.factory = factory self.maxsize = maxsize self.timeout = timeout self.clients = PriorityQueue(maxsize) # If there is a maxsize, prime the queue with empty slots. if maxsize is not None: for _ in xrange(maxsize): self.clients.put(EMPTY_SLOT)
def __init__(self, start_requests): self.start_request = start_requests self.domain = tldextract.extract(self.start_request.url).domain self.request_queue = PriorityQueue() self.result = { start_requests.url: 0, } self.gl_list = [] self.stop_flag = False
def __init__(self, zoomeye_results, threads_num): self.threads_num = threads_num self.targets = PriorityQueue() self.zoomeye_results = zoomeye_results self.result = [] for zoomeye_result in zoomeye_results: self.targets.put(zoomeye_result) self.total = self.targets.qsize() self.pbar = tqdm(total=self.total,ascii=True)
def test__check_bulk(self): input_queue = PriorityQueue() queue = PriorityQueue() old_date_modified = datetime.now().isoformat() id_1 = uuid4().hex date_modified_1 = datetime.now().isoformat() id_2 = uuid4().hex date_modified_2 = datetime.now().isoformat() id_3 = uuid4().hex date_modified_3 = datetime.now().isoformat() db = MagicMock() bulk = { id_1: date_modified_1, id_2: date_modified_2, id_3: date_modified_3 } priority_cache = {id_1: 1, id_2: 1, id_3: 1} return_value = { u'docs': [ { u'_type': u'Tender', u'_source': { u'dateModified': date_modified_1 }, u'_index': u'bridge_tenders', u'_version': 1, u'found': True, u'_id': id_1 }, { u'_type': u'Tender', u'_source': { u'dateModified': old_date_modified }, u'_index': u'bridge_tenders', u'_version': 1, u'found': True, u'_id': id_2 }, { u'found': False, u'_type': u'Tender', u'_id': id_3, u'_index': u'bridge_tenders' } ] } db.mget.return_value = return_value elastic_filter = BasicElasticSearchFilter(self.config, input_queue, queue, db) self.assertEqual(queue.qsize(), 0) elastic_filter._check_bulk(bulk, priority_cache) self.assertEqual(queue.qsize(), 2)
def __init__(self, target, options): # 设置优先级 self.queue = PriorityQueue() self.priority = 0 # 根据参数进行基本设置 self.target = target.strip() self.options = options self.ignore_intranet = options.get('ignore_intranet') # 是否用大字典 if self.options.get('subnames_full'): outfile_name+='_sfull' if self.options.get('next_sub_full'): outfile_name += '_nfull' # 根据主域名确定结果文件名称 outfile_name = options.get('file') if options.get('file') else(target) self.fname = 'results/'+outfile_name+'.txt' self.outfile = open('results/'+outfile_name+'.txt', 'wb') self.outfile_ips = open('results/'+outfile_name+'_ip.txt', 'w') # 设置dns解析器 (根据预设的线程数量初始化dns resolver) # QUESTION: configure = False还是不太明白 为什么要不以/etc/resolv.conf的常规常规配置?? self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.get('threads'))] for _ in self.resolvers: ''' dns.resolver.Resolver: http://www.dnspython.org/docs/1.14.0/dns.resolver.Resolver-class.html dns.resolver.Resolver.lifetime: The total number of seconds to spend trying to get an answer to the question. dns.resolver.Resolver.timeout: The number of seconds to wait for a response from a server, before timing out. ''' # QUESTION:lifetime 与 timeout 什么区别? _.lifetime = _.timeout = 10.0 # 加载dns服务器列表 self._load_dns_servers() # self.ex_resolver是备用的在出现except时使用的dns_resolver self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers self.logfile = open('results/'+target+'_log.txt','a') #set subdomain dct set self._load_next_sub() self._load_sub_names() #set init paras self.start_time = time.time() self.scan_count = 0 self.found_count = 0 # 已验证过存在子域名的前缀 self.STOP_ME = False self.ip_dict = {} self.found_subs = set()
def __init__(self,host,keyword,ips,timeout): self.threads = 100 self.queue = PriorityQueue() self.host = host self.keyword = keyword self.result = [] for ip in ips: self.queue.put(ip) self.num = self.queue.qsize() self.i = 0 self.success = 0 self.timeout = timeout self.filename = os.path.join(rootPath,"result",host + ".log") self.outfile = open(self.filename, 'w')
def __init__(self, service): self.service = service self._queue = PriorityQueue() # done keeps the tasks that have been extracted from the queue # so we can inspect them later # keep the done task on disk, not in memory. # now we use the filesystem, but we could plug any key-value stor or database behind # check TaskStorageBase to see the interface your storage needs to have # to be used to store tasks # self._done = TaskStorageFile(self) self._done = TaskStorageSqlite(self) # pointer to current task self._current = None self._current_mu = Semaphore()
def Channel(from_fsm, to_fsm, tracer, queue=None): if settings.instrumented: return _Channel(from_fsm, to_fsm, tracer, queue) if queue is not None: return queue else: return PriorityQueue()
def main(n_processor = 3, n_picker = 3, queue_size = 10, *a, **kw): """ Queue processor simulator. Parameters ---------- n_processor : int Number of processors working simultaneously n_picker : int Number of pickers working simultaneously queue_size : int Maximum allowed size of queue Returns ---------- None """ pages = page_generator() global queue queue = PriorityQueue(maxsize = queue_size) spawn_list = [] for i in range(n_processor): greenlet = gevent.spawn(page_processor, 'Processor {0}'.format(i+1), pages, next_step, queue) spawn_list.append(greenlet) for j in range(n_picker): greenlet = gevent.spawn(pick_page, 'Picker {0}'.format(j+1), pages, queue) spawn_list.append(greenlet) gevent.joinall(spawn_list) print('####################### END #######################')
def __init__(self, target, options, process_num, dns_servers, next_subs, scan_count, found_count, queue_size_list, tmp_dir): self.target = target.strip() self.options = options self.process_num = process_num self.dns_servers = dns_servers self.dns_count = len(dns_servers) self.next_subs = next_subs self.scan_count = scan_count self.scan_count_local = 0 self.found_count = found_count self.found_count_local = 0 self.queue_size_list = queue_size_list self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)] for _r in self.resolvers: _r.lifetime = _r.timeout = 6.0 self.queue = PriorityQueue() self.item_index = 0 self.priority = 0 self._load_sub_names() self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = dns_servers self.local_time = time.time() self.outfile = open('%s/%s_part_%s.txt' % (tmp_dir, target, process_num), 'w')
def __init__(self, target, options): self.start_time = time.time() self.target = target.strip() self.options = options self.ignore_intranet = options.i self.scan_count = self.found_count = 0 self.console_width = getTerminalSize()[0] - 2 self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)] for _ in self.resolvers: _.lifetime = _.timeout = 10.0 self.print_count = 0 self.STOP_ME = False self._load_dns_servers() self._load_next_sub() self.queue = PriorityQueue() self.priority = 0 self._load_sub_names() if options.output: outfile = options.output else: _name = os.path.basename(self.options.file).replace('subnames', '') if _name != '.txt': _name = '_' + _name outfile = target + _name if not options.full_scan else target + '_full' + _name self.outfile = open(outfile, 'w') self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers
def __init__(self, crawler): self._crawler = crawler self.proxy_pool = Queue() self._proxy_lock = RLock() max_connections = crawler.max_connections self._request_queue = PriorityQueue() self._request_semaphore = BoundedSemaphore(max_connections)
def worker(): q = PriorityQueue() q.put(Job(5, 'mid job')) q.put(Job(10, 'low job')) q.put(Job(1, 'high job')) while not q.empty(): job = q.get() print(job)
def __init__(self, target, options): self.start_time = time.time() self.target = target.strip() self.options = options self.scan_count = self.found_count = 0 self.console_width = os.get_terminal_size()[0] - 2 # create dns resolver pool ~ workers self.resolvers = [ dns.resolver.Resolver(configure=False) for _ in range(options.threads) ] for resolver in self.resolvers: resolver.lifetime = resolver.timeout = 10.0 self.print_count = 0 self.STOP_ME = False # load dns servers and check whether these dns servers works fine ? self._load_dns_servers() # load sub names self.subs = [] # subs in file self.goodsubs = [] # checks ok for further exploitation self._load_subname('dict/subnames.txt', self.subs) # load sub.sub names self.subsubs = [] self._load_subname('dict/next_sub.txt', self.subsubs) # results will save to target.txt global path path = os.path.join("results", target) if not os.path.exists(path): os.makedirs(path) self.outfile = open('%s/%s.txt' % (path, target), 'w') self.ip_dict = set() # self.found_sub = set() # task queue self.queue = PriorityQueue() for sub in self.subs: self.queue.put(sub)
class HttpTest(object): def __init__(self,host,keyword,ips,timeout): self.threads = 100 self.queue = PriorityQueue() self.host = host self.keyword = keyword self.result = [] for ip in ips: self.queue.put(ip) self.num = self.queue.qsize() self.i = 0 self.success = 0 self.timeout = timeout self.filename = os.path.join(rootPath,"result",host + ".log") self.outfile = open(self.filename, 'w') def _scan(self,j): while not self.queue.empty(): try: item = self.queue.get(timeout=3.0) if config.HTTPS_Support: host, domain, port = item, self.host , 443 else: host, domain, port = item, self.host , 80 html = httpServer((host, domain, port),self.timeout) if html is not None and self.keyword in html: self.outfile.write(item + '\n') self.outfile.flush() self.success += 1 except: pass finally: self.i += 1 msg = '[*] %s found, %s scanned , %s groups left'%(self.success,self.i,self.num - self.i) print_msg(msg) time.sleep(1.0) def run(self): threads = [gevent.spawn(self._scan, i) for i in range(self.threads)] gevent.joinall(threads) msg = '[+] All Done. Success:%d Saved in:%s'%(self.success,self.filename) print_msg(msg, line_feed=True)
def __init__(self, fsm_registry, connector_registry, configuration): self.fsm_registry = fsm_registry self.connector_registry = connector_registry self.context = zmq.Context.instance() self.socket = self.context.socket(zmq.ROUTER) if 'bind_port' in configuration: self.socket_port = configuration.get('bind_port') self.socket.bind('tcp://{0}:{1}'.format(configuration.get('bind_address', '127.0.0.1'), self.socket_port)) else: self.socket_port = self.socket.bind_to_random_port('tcp://{0}'.format(configuration.get('bind_address', '127.0.0.1'))) logger.info('starting zmq_thread') self.zmq_thread = gevent.spawn(self.receive_external_messages) self.inbox_thread = gevent.spawn(self.receive_internal_messages) self.inbox = PriorityQueue() self.message_id_seq = count(0) self.client_id_seq = count(0) self.clients = dict()
def test__get_resource_item_from_queue(self): items_queue = PriorityQueue() item = (1, uuid.uuid4().hex) items_queue.put(item) # Success test worker = ResourceItemWorker(resource_items_queue=items_queue, config_dict=self.worker_config) self.assertEqual(worker.resource_items_queue.qsize(), 1) priority, resource_item = worker._get_resource_item_from_queue() self.assertEqual((priority, resource_item), item) self.assertEqual(worker.resource_items_queue.qsize(), 0) # Empty queue test priority, resource_item = worker._get_resource_item_from_queue() self.assertEqual(resource_item, None) self.assertEqual(priority, None) del worker
def test_add_to_retry_queue(self, mocked_logger): retry_items_queue = PriorityQueue() worker = AgreementWorker(config_dict=self.worker_config, retry_resource_items_queue=retry_items_queue) resource_item = {'id': uuid.uuid4().hex} priority = 1000 self.assertEqual(retry_items_queue.qsize(), 0) # Add to retry_resource_items_queue worker.add_to_retry_queue(resource_item, priority=priority) self.assertEqual(retry_items_queue.qsize(), 1) priority, retry_resource_item = retry_items_queue.get() self.assertEqual((priority, retry_resource_item), (1001, resource_item)) resource_item = {'id': 0} # Add to retry_resource_items_queue with status_code '429' worker.add_to_retry_queue(resource_item, priority, status_code=429) self.assertEqual(retry_items_queue.qsize(), 1) priority, retry_resource_item = retry_items_queue.get() self.assertEqual((priority, retry_resource_item), (1001, resource_item)) priority = 1002 worker.add_to_retry_queue(resource_item, priority=priority) sleep(worker.config['retry_default_timeout'] * 2) self.assertEqual(retry_items_queue.qsize(), 1) priority, retry_resource_item = retry_items_queue.get() self.assertEqual((priority, retry_resource_item), (1003, resource_item)) worker.add_to_retry_queue(resource_item, priority=priority) self.assertEqual(retry_items_queue.qsize(), 0) mocked_logger.critical.assert_called_once_with( 'Tender {} reached limit retries count {} and droped from ' 'retry_queue.'.format(resource_item['id'], worker.config['retries_count']), extra={ 'MESSAGE_ID': 'dropped_documents', 'JOURNAL_TENDER_ID': resource_item['id'] }) del worker
def __init__(self, signer, message_broker, trader_client, fee_rate=None): self._sign = signer.sign self.address = signer.address self.swaps = dict() # offer_hash -> CommitmentTuple self.trader_client = trader_client # FIXME fee_rate should be int representation (int(float_rate/uint32.max_int)) for CSAdvertisements self.fee_rate = fee_rate self.message_broker = message_broker self.refund_queue = PriorityQueue() # type: (TransferReceipt, substract_fee <bool>) self.message_queue = Queue() # type: (messages.Signed, recipient (str) or None)
def __init__(self, host_url, resource, auth=None, params={}, headers=None, retrievers_params=DEFAULT_RETRIEVERS_PARAMS, adaptive=False, with_priority=False): LOGGER.info(f'Init SyncClient for resource {resource}') self.host = host_url self.auth = auth self.resource = resource self.adaptive = adaptive self.headers = headers self.params = params self.retrievers_params = retrievers_params self.queue = PriorityQueue(maxsize=retrievers_params['queue_size'])
def __init__(self, factory, retry_max=3, retry_delay=.1, timeout=-1, max_lifetime=600., max_size=10, options=None): self.max_size = max_size self.pool = PriorityQueue() self.size = 0 self.factory = factory self.retry_max = retry_max self.retry_delay = retry_delay self.timeout = timeout self.max_lifetime = max_lifetime if options is None: self.options = {} else: self.options = options
def test_run(self): result = self.mox.CreateMock(AsyncResult) env = Envelope('*****@*****.**', ['*****@*****.**']) env.parse('From: [email protected]\r\n\r\ntest test\r\n') queue = PriorityQueue() queue.put((1, result, env)) self.sock.recv(IsA(int)).AndReturn('220 Welcome\r\n') self.sock.sendall('EHLO test\r\n') self.sock.recv(IsA(int)).AndReturn('250-Hello\r\n250 PIPELINING\r\n') self.sock.sendall('MAIL FROM:<*****@*****.**>\r\nRCPT TO:<*****@*****.**>\r\nDATA\r\n') self.sock.recv(IsA(int)).AndReturn('250 Ok\r\n250 Ok\r\n354 Go ahead\r\n') self.sock.sendall('From: [email protected]\r\n\r\ntest test\r\n.\r\n') self.sock.recv(IsA(int)).AndReturn('250 Ok\r\n') result.set(True) self.sock.sendall('QUIT\r\n') self.sock.recv(IsA(int)).AndReturn('221 Goodbye\r\n') self.sock.close() self.mox.ReplayAll() client = SmtpRelayClient(None, queue, socket_creator=self._socket_creator, ehlo_as='test') client._run()
def __init__(self, host, port=25, pool_size=None, client_class=None, **client_kwargs): super(StaticSmtpRelay, self).__init__() if client_class: self.client_class = client_class else: from slimta.relay.smtp.client import SmtpRelayClient self.client_class = SmtpRelayClient self.host = host self.port = port self.queue = PriorityQueue() self.pool = set() self.pool_size = pool_size self.client_kwargs = client_kwargs
class Actor(Greenlet): """Simple implementation of the Actor pattern """ def __init__(self): self.inbox = PriorityQueue() self._handlers = {ShutdownRequest: self.receive_shutdown} Greenlet.__init__(self) def receive(self, msg): """Dispatch a received message to the appropriate type handler """ #log.debug("Received a message: " + repr(msg)) cls = msg.__class__ if cls in self._handlers.keys(): self._handlers[cls](msg) else: raise NotImplemented() def receive_shutdown(self, msg): self.running = False def send(self, msg, priority=50): """Place a message into the actor's inbox """ self.inbox.put((priority, msg)) def _run(self): """Run the Actor in a blocking event loop """ self.running = True while self.running: prio, msg = self.inbox.get() self.receive(msg) del msg
def __init__(self, target, options): self.start_time = time.time() self.target = target.strip() self.options = options self.client = MongoClient(connect=False) self.host_db = self.client['orangescan']['subdomain']['host'] self.ip_db = self.client['orangescan']['subdomain']['ip'] self.ignore_intranet = options.i self.scan_count = self.found_count = 0 self.console_width = getTerminalSize()[0] - 2 self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)] for _ in self.resolvers: _.lifetime = _.timeout = 10.0 self.print_count = 0 self.STOP_ME = False self._load_dns_servers() self._load_next_sub() self.queue = PriorityQueue() self.priority = 0 self._load_sub_names() self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers
class StaticSmtpRelay(Relay): """Manages the relaying of messages to a specific ``host:port``. Connections may be recycled when possible, to send multiple messages over a single channel. :param host: Host string to connect to. :param port: Port to connect to. :param pool_size: At most this many simultaneous connections will be open to the destination. If this limit is reached and no connections are idle, new attempts will block. :param tls: Optional dictionary of TLS settings passed directly as keyword arguments to :class:`gevent.ssl.SSLSocket`. :param tls_required: If given and True, it should be considered a delivery failure if TLS cannot be negotiated by the client. :param connect_timeout: Timeout in seconds to wait for a client connection to be successful before issuing a transient failure. :param command_timeout: Timeout in seconds to wait for a reply to each SMTP command before issuing a transient failure. :param data_timeout: Timeout in seconds to wait for a reply to message data before issuing a transient failure. :param idle_timeout: Timeout in seconds after a message is delivered before a QUIT command is sent and the connection terminated. If another message should be delivered before this timeout expires, the connection will be re-used. By default, QUIT is sent immediately and connections are never re-used. """ def __init__(self, host, port=25, pool_size=None, client_class=None, **client_kwargs): super(StaticSmtpRelay, self).__init__() if client_class: self.client_class = client_class else: from slimta.relay.smtp.client import SmtpRelayClient self.client_class = SmtpRelayClient self.host = host self.port = port self.queue = PriorityQueue() self.pool = set() self.pool_size = pool_size self.client_kwargs = client_kwargs def _remove_client(self, client): self.pool.remove(client) if not self.queue.empty() and not self.pool: self._add_client() def _add_client(self): client = self.client_class((self.host, self.port), self.queue, **self.client_kwargs) client.start() client.link(self._remove_client) self.pool.add(client) def _check_idle(self): for client in self.pool: if client.idle: return if not self.pool_size or len(self.pool) < self.pool_size: self._add_client() def attempt(self, envelope, attempts): self._check_idle() result = AsyncResult() self.queue.put((1, result, envelope)) return result.get()
class NetworkManager(object): """网络控制类""" logger = logging.getLogger('Crawler.NetworkManager') def __init__(self, crawler): self._crawler = crawler self.proxy_pool = Queue() self._proxy_lock = RLock() max_connections = crawler.max_connections self._request_queue = PriorityQueue() self._request_semaphore = BoundedSemaphore(max_connections) def join(self): """等待队列里面的请求发送完成""" while not self._request_queue.empty(): # self._process_request_from_queue() gevent.sleep(5) def request(self, method, url, **kwargs): """阻塞请求一个url。 :param method: :param url: :param kwargs: 同add_request :return: :rtype: :raise err: """ # 构造默认HTTP头 default_header = { 'Accept': self._crawler.accept_mine, 'Accept-Language': self._crawler.accept_language, 'User-Agent': self._crawler.user_agent, 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate' } # 如果没有设置headers就使用全局设置 kwargs['headers'] = kwargs.pop('headers', {}) default_header.update(kwargs['headers']) kwargs['headers'] = default_header # 如果没有设置timeout就使用全局设置 kwargs['timeout'] = kwargs.pop('timeout', self._crawler.timeout) session = requests.Session() session.max_redirects = self._crawler.max_redirects kwargs['cookies'] = kwargs.pop('cookies', {}) # 设置代理 kwargs['proxies'] = kwargs.pop('proxies', self._crawler.proxies) try_times = 0 while try_times <= self._crawler.max_retries: try_times += 1 try: self.logger.debug('[%s]>> %s' % (method.upper(), url)) response = session.request(method, url, **kwargs) if self._crawler.retry_with_no_content and not response.content: self.logger.warning('Page have no content.') raise NoContent if self._crawler.retry_with_broken_content and '</html>' not in response.content: self.logger.warning('Page content has been breaken.') raise BreakenContent if response.status_code in self._crawler.do_not_retry_with_server_error_code: self.logger.warning( 'Something wrong with server,but we DO NOT retry with it.') raise ServerErrorWithoutRetry( 'Error Code:%s' % response.status_code) # 遇到非200错误 if response.status_code != 200 and response.status_code not in self._crawler.ignore_server_error_code: self._crawler.on_server_error(response) # self.logger.warning('Something wrong with server.') # raise ServerError, 'Error Code:%s' % response.status_code except (ConnectionError, Timeout, socket.timeout, socket.error, TryAgain,), err: # 好恶心的做法,代理发生错误居然没有特定的Exception if kwargs['proxies'] and any( urlsplit(proxy).hostname in str(err.message) for proxy in kwargs['proxies'].values()): # 代理有问题就切换呗 self.logger.debug( 'Proxy %s seems go down.', kwargs['proxies']) self.switch_proxy(kwargs['proxies'].values()[0]) # self._crawler.on_proxies_error(kwargs['proxies'][0]) # 如果发生重试异常和空白页异常的,就进行重试,否则把异常往上爆 if isinstance(err, ConnectionError) and not isinstance(err.message, MaxRetryError): raise err sleep_time = self._crawler.sleep_seconds * try_times self.logger.debug(err) self.logger.info('Try again with %s after %s ' 'seconds' % (url, sleep_time)) gevent.sleep(sleep_time) except BaseException, err: # TODO:不知道是不是这里有捕获不了的gevent超时,稳定后删除。 self.logger.error(type(err)) self.logger.error(err) else:
class FactoryPool(object): def __init__(self, factory, maxsize=200, timeout=60): self.factory = factory self.maxsize = maxsize self.timeout = timeout self.clients = PriorityQueue(maxsize) # If there is a maxsize, prime the queue with empty slots. if maxsize is not None: for _ in xrange(maxsize): self.clients.put(EMPTY_SLOT) @contextlib.contextmanager def reserve(self): """Context-manager to obtain a Client object from the pool.""" ts, client = self._checkout_connection() try: yield client finally: self._checkin_connection(ts, client) def _checkout_connection(self): # If there's no maxsize, no need to block waiting for a connection. blocking = self.maxsize is not None # Loop until we get a non-stale connection, or we create a new one. while True: try: ts, client = self.clients.get(blocking) except Empty: # No maxsize and no free connections, create a new one. # XXX TODO: we should be using a monotonic clock here. # see http://www.python.org/dev/peps/pep-0418/ now = int(time.time()) return now, self.factory() else: now = int(time.time()) # If we got an empty slot placeholder, create a new connection. if client is None: return now, self.factory() # If the connection is not stale, go ahead and use it. if ts + self.timeout > now: return ts, client # Otherwise, the connection is stale. # Close it, push an empty slot onto the queue, and retry. if hasattr(client, 'disconnect'): client.disconnect() self.clients.put(EMPTY_SLOT) continue def _checkin_connection(self, ts, client): """Return a connection to the pool.""" if hasattr(client, '_closed') and client._closed: self.clients.put(EMPTY_SLOT) return # If the connection is now stale, don't return it to the pool. # Push an empty slot instead so that it will be refreshed when needed. now = int(time.time()) if ts + self.timeout > now: self.clients.put((ts, client)) else: if self.maxsize is not None: self.clients.put(EMPTY_SLOT)
class SubNameBrute: def __init__(self, target, options): self.start_time = time.time() self.target = target.strip() self.options = options self.ignore_intranet = options.i self.scan_count = self.found_count = 0 self.console_width = getTerminalSize()[0] - 2 self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)] for _ in self.resolvers: _.lifetime = _.timeout = 10.0 self.print_count = 0 self.STOP_ME = False self._load_dns_servers() self._load_next_sub() self.queue = PriorityQueue() self.priority = 0 self._load_sub_names() if options.output: outfile = options.output else: _name = os.path.basename(self.options.file).replace('subnames', '') if _name != '.txt': _name = '_' + _name outfile = target + _name if not options.full_scan else target + '_full' + _name self.outfile = open(outfile, 'w') self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = self.dns_servers def _load_dns_servers(self): print '[+] Validate DNS servers ...' self.dns_servers = [] pool = Pool(30) for server in open('dict/dns_servers.txt').xreadlines(): server = server.strip() if server: pool.apply_async(self._test_server, (server,)) pool.join() self.dns_count = len(self.dns_servers) sys.stdout.write('\n') print '[+] Found %s available DNS Servers in total' % self.dns_count if self.dns_count == 0: print '[ERROR] No DNS Servers available.' sys.exit(-1) def _test_server(self, server): resolver = dns.resolver.Resolver() resolver.lifetime = resolver.timeout = 10.0 try: resolver.nameservers = [server] answers = resolver.query('public-dns-a.baidu.com') # test lookup a existed domain if answers[0].address != '180.76.76.76': raise Exception('incorrect DNS response') try: resolver.query('test.bad.dns.lijiejie.com') # Non-existed domain test with open('bad_dns_servers.txt', 'a') as f: f.write(server + '\n') self._print_msg('[+] Bad DNS Server found %s' % server) except: self.dns_servers.append(server) self._print_msg('[+] Check DNS Server %s < OK > Found %s' % (server.ljust(16), len(self.dns_servers))) except: self._print_msg('[+] Check DNS Server %s <Fail> Found %s' % (server.ljust(16), len(self.dns_servers))) def _load_sub_names(self): self._print_msg('[+] Load sub names ...') if self.options.full_scan and self.options.file == 'subnames.txt': _file = 'dict/subnames_full.txt' else: if os.path.exists(self.options.file): _file = self.options.file elif os.path.exists('dict/%s' % self.options.file): _file = 'dict/%s' % self.options.file else: self._print_msg('[ERROR] Names file not exists: %s' % self.options.file) exit(-1) normal_lines = [] wildcard_lines = [] wildcard_list = [] regex_list = [] lines = set() with open(_file) as f: for line in f.xreadlines(): sub = line.strip() if not sub or sub in lines: continue lines.add(sub) if sub.find('{alphnum}') >= 0 or sub.find('{alpha}') >= 0 or sub.find('{num}') >= 0: wildcard_lines.append(sub) sub = sub.replace('{alphnum}', '[a-z0-9]') sub = sub.replace('{alpha}', '[a-z]') sub = sub.replace('{num}', '[0-9]') if sub not in wildcard_list: wildcard_list.append(sub) regex_list.append('^' + sub + '$') else: normal_lines.append(sub) pattern = '|'.join(regex_list) if pattern: _regex = re.compile(pattern) if _regex: for line in normal_lines[:]: if _regex.search(line): normal_lines.remove(line) for item in normal_lines: self.priority += 1 self.queue.put((self.priority, item)) for item in wildcard_lines: self.queue.put((88888888, item)) def _load_next_sub(self): self._print_msg('[+] Load next level subs ...') self.next_subs = [] _set = set() _file = 'dict/next_sub.txt' if not self.options.full_scan else 'dict/next_sub_full.txt' with open(_file) as f: for line in f: sub = line.strip() if sub and sub not in self.next_subs: tmp_set = {sub} while len(tmp_set) > 0: item = tmp_set.pop() if item.find('{alphnum}') >= 0: for _letter in 'abcdefghijklmnopqrstuvwxyz0123456789': tmp_set.add(item.replace('{alphnum}', _letter, 1)) elif item.find('{alpha}') >= 0: for _letter in 'abcdefghijklmnopqrstuvwxyz': tmp_set.add(item.replace('{alpha}', _letter, 1)) elif item.find('{num}') >= 0: for _letter in '0123456789': tmp_set.add(item.replace('{num}', _letter, 1)) elif item not in _set: _set.add(item) self.next_subs.append(item) def _print_msg(self, _msg=None, _found_msg=False): if _msg is None: self.print_count += 1 if self.print_count < 100: return self.print_count = 0 msg = '%s Found| %s Groups| %s scanned in %.1f seconds' % ( self.found_count, self.queue.qsize(), self.scan_count, time.time() - self.start_time) sys.stdout.write('\r' + ' ' * (self.console_width - len(msg)) + msg) elif _msg.startswith('[+] Check DNS Server'): sys.stdout.write('\r' + _msg + ' ' * (self.console_width - len(_msg))) else: sys.stdout.write('\r' + _msg + ' ' * (self.console_width - len(_msg)) + '\n') if _found_msg: msg = '%s Found| %s Groups| %s scanned in %.1f seconds' % ( self.found_count, self.queue.qsize(), self.scan_count, time.time() - self.start_time) sys.stdout.write('\r' + ' ' * (self.console_width - len(msg)) + msg) sys.stdout.flush() @staticmethod def is_intranet(ip): ret = ip.split('.') if len(ret) != 4: return True if ret[0] == '10': return True if ret[0] == '172' and 16 <= int(ret[1]) <= 32: return True if ret[0] == '192' and ret[1] == '168': return True return False def put_item(self, item): num = item.count('{alphnum}') + item.count('{alpha}') + item.count('{num}') if num == 0: self.priority += 1 self.queue.put((self.priority, item)) else: self.queue.put((self.priority + num * 10000000, item)) def _scan(self, j): self.resolvers[j].nameservers = [self.dns_servers[j % self.dns_count]] while not self.queue.empty(): try: item = self.queue.get(timeout=1.0)[1] self.scan_count += 1 except: break self._print_msg() try: if item.find('{alphnum}') >= 0: for _letter in 'abcdefghijklmnopqrstuvwxyz0123456789': self.put_item(item.replace('{alphnum}', _letter, 1)) continue elif item.find('{alpha}') >= 0: for _letter in 'abcdefghijklmnopqrstuvwxyz': self.put_item(item.replace('{alpha}', _letter, 1)) continue elif item.find('{num}') >= 0: for _letter in '0123456789': self.put_item(item.replace('{num}', _letter, 1)) continue elif item.find('{next_sub}') >= 0: for _ in self.next_subs: self.queue.put((0, item.replace('{next_sub}', _, 1))) continue else: sub = item if sub in self.found_subs: continue cur_sub_domain = sub + '.' + self.target _sub = sub.split('.')[-1] try: answers = self.resolvers[j].query(cur_sub_domain) except dns.resolver.NoAnswer, e: answers = self.ex_resolver.query(cur_sub_domain) if answers: self.found_subs.add(sub) ips = ', '.join(sorted([answer.address for answer in answers])) if ips in ['1.1.1.1', '127.0.0.1', '0.0.0.0']: continue if self.ignore_intranet and SubNameBrute.is_intranet(answers[0].address): continue try: self.scan_count += 1 answers = self.resolvers[j].query(cur_sub_domain, 'cname') cname = answers[0].target.to_unicode().rstrip('.') if cname.endswith(self.target) and cname not in self.found_subs: self.found_subs.add(cname) cname_sub = cname[:len(cname) - len(self.target) - 1] # new sub self.queue.put((0, cname_sub)) except: pass if (_sub, ips) not in self.ip_dict: self.ip_dict[(_sub, ips)] = 1 else: self.ip_dict[(_sub, ips)] += 1 if ips not in self.ip_dict: self.ip_dict[ips] = 1 else: self.ip_dict[ips] += 1 if self.ip_dict[(_sub, ips)] > 3 or self.ip_dict[ips] > 6: continue self.found_count += 1 msg = cur_sub_domain.ljust(30) + ips self._print_msg(msg, _found_msg=True) self._print_msg() self.outfile.write(cur_sub_domain.ljust(30) + '\t' + ips + '\n') self.outfile.flush() try: self.resolvers[j].query('lijiejietest.' + cur_sub_domain) except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e: self.queue.put((999999999, '{next_sub}.' + sub)) except: pass except (dns.resolver.NXDOMAIN, dns.name.EmptyLabel) as e: pass except (dns.resolver.NoNameservers, dns.resolver.NoAnswer, dns.exception.Timeout) as e: pass
class WaveGenerator(object): def __init__(self, u): self.uid = u.uid self.access_token = u.access_token self.db = DB() self.results = PriorityQueue() def fetch_friends(self): friends = self.db.friends(self.uid) if not friends: friends = [User(x["id"], None, x["name"]) for x in unpage_par(fb.me.using(self.access_token).friends.get)] self.db.store_friends(self.uid, friends) return friends def rt_listens_for(self, u): rt_results = users.subscribe(u.uid, self.access_token) for listen in rt_results: if not self.db.has_song(listen.sid): self.db.store_song(self.fetch_song(listen.sid)) t = self.db.match_song(listen.sid) if t: self.results.put((1, { "trackId": t.ztid, "userId": u.uid, "userName": u.uname, "src": t.surl, "songName": t.title, "artistName": t.artist_name, "artistPhoto": t.aimgurl, "coverSrc": t.rimgurl, "timestamp": listen.ts.strftime("%Y-%m-%dT%H:%M:%S+0000") })) def listens_for(self, u, num=50): last_ts, last_cts = self.db.last_listen(u.uid) #if not last_cts or ( # last_cts and datetime.utcnow() - last_ts > timedelta(seconds=300)): # for listen in unpage_seq( # fb[u.uid]["music.listens"].using(self.access_token).get, num): # ts = fb_datetime(listen.get("end_time")) # if last_ts and last_ts >= ts: # break # listen = Listen( # lid=listen.get("id"), # uid=u.uid, # sid=listen.get("data", {}).get("song", {}).get("id"), # ts=ts) # self.db.store_listen(listen) # yield listen # self.db.update_cts(u.uid) time.sleep(2) for n, listen in enumerate(self.db.listens(u.uid, last_ts)): if n % 3 == 0: time.sleep(1) yield listen def fetch_song(self, sid): data = fb[sid].using(self.access_token).get() return Song( sid=sid, title=data.get("title"), artist_name=data.get("data", {}).get("musician", [{}])[0].get("name"), site_name=data.get("site_name")) def fetch_listens(self, u): for listen in self.listens_for(u): if not self.db.has_song(listen.sid): self.db.store_song(self.fetch_song(listen.sid)) t = self.db.match_song(listen.sid) if t: self.results.put((10, { "trackId": t.ztid, "userId": u.uid, "userName": u.uname, "src": t.surl, "songName": t.title, "artistName": t.artist_name, "artistPhoto": t.aimgurl, "coverSrc": t.rimgurl, "timestamp": listen.ts.strftime("%Y-%m-%dT%H:%M:%S+0000") })) def fetch(self): friends = self.fetch_friends() for f in friends: spawn(self.rt_listens_for, f) spawn(self.fetch_listens, f) def __call__(self): spawn(self.fetch) return self.results
def __init__(self, u): self.uid = u.uid self.access_token = u.access_token self.db = DB() self.results = PriorityQueue()
class DatasetDownloader(object): _queue_item_type = namedtuple("queue_item", ("hour", "sleep_until", "filename", "expect_pressures", "bad_downloads")) def __init__(self, directory, ds_time, timeout=120, first_file_timeout=600, bad_download_retry_limit=3, write_dataset=True, write_gribmirror=True, deadline=None, dataset_host="ftp.ncep.noaa.gov", dataset_path="/pub/data/nccf/com/gfs/prod/gfs.{0}/"): # set these ASAP for close() via __del__ if __init__ raises something self.success = False self._dataset = None self._gribmirror = None self._tmp_directory = None assert ds_time.hour in (0, 6, 12, 18) assert ds_time.minute == ds_time.second == ds_time.microsecond == 0 if not (write_dataset or write_gribmirror): raise ValueError("Choose write_datset or write_gribmirror " "(or both)") if deadline is None: deadline = max(datetime.now() + timedelta(hours=2), ds_time + timedelta(hours=9, minutes=30)) self.directory = directory self.ds_time = ds_time self.timeout = timeout self.first_file_timeout = first_file_timeout self.write_dataset = write_dataset self.write_gribmirror = write_gribmirror self.bad_download_retry_limit = bad_download_retry_limit self.deadline = deadline self.dataset_host = dataset_host self.dataset_path = dataset_path self.have_first_file = False self.files_complete = 0 self.files_count = 0 self.completed = Event() ds_time_str = self.ds_time.strftime("%Y%m%d%H") self.remote_directory = dataset_path.format(ds_time_str) self._greenlets = Group() self.unpack_lock = RLock() # Items in the queue are # (hour, sleep_until, filename, ...) # so they sort by hour, and then if a not-found adds a delay to # a specific file, files from that hour without the delay # are tried first self._files = PriorityQueue() # areas in self.dataset.array are considered 'undefined' until # self.checklist[index[:3]] is True, since unpack_grib may # write to them, and then abort via ValueError before marking # updating the checklist if the file turns out later to be bad # the checklist also serves as a sort of final sanity check: # we also have "does this file contain all the records we think it # should" checklists; see Worker._download_file self._checklist = make_checklist() def open(self): logger.info("downloader: opening files for dataset %s", self.ds_time) self._tmp_directory = \ tempfile.mkdtemp(dir=self.directory, prefix="download.") os.chmod(self._tmp_directory, 0o775) logger.debug("Temporary directory is %s", self._tmp_directory) if self.write_dataset: self._dataset = \ Dataset(self.ds_time, directory=self._tmp_directory, new=True) if self.write_gribmirror: fn = Dataset.filename(self.ds_time, directory=self._tmp_directory, suffix=Dataset.SUFFIX_GRIBMIRROR) logger.debug("Opening gribmirror (truncate and write) %s %s", self.ds_time, fn) self._gribmirror = open(fn, "w+") def download(self): logger.info("download of %s starting", self.ds_time) ttl, addresses = resolve_ipv4(self.dataset_host) logger.debug("Resolved to %s IPs", len(addresses)) addresses = [inet_ntoa(x) for x in addresses] total_timeout = self.deadline - datetime.now() total_timeout_secs = total_timeout.total_seconds() if total_timeout_secs < 0: raise ValueError("Deadline already passed") else: logger.debug("Deadline in %s", total_timeout) self._add_files() self._run_workers(addresses, total_timeout_secs) if not self.completed.is_set(): raise ValueError("timed out") if not self._checklist.all(): raise ValueError("incomplete: records missing") self.success = True logger.debug("downloaded %s successfully", self.ds_time) def _add_files(self): filename_prefix = self.ds_time.strftime("gfs.t%Hz.pgrb2") for hour in Dataset.axes.hour: hour_str = "{0:02}".format(hour) for bit, exp_pr in (("f", Dataset.pressures_pgrb2f), ("bf", Dataset.pressures_pgrb2bf)): self._files.put(self._queue_item_type( hour, 0, filename_prefix + bit + hour_str, exp_pr, 0)) self.files_count += 1 logger.info("Need to download %s files", self.files_count) def _run_workers(self, addresses, total_timeout_secs): logger.debug("Spawning %s workers", len(addresses) * 2) # don't ask _join_all to raise the first exception it catches # if we're already raising something in the except block raising = False try: for worker_id, address in enumerate(addresses * 2): w = DownloadWorker(self, worker_id, address) w.start() w.link() self._greenlets.add(w) # worker unhandled exceptions are raised in this greenlet # via link(). They can appear in completed.wait and # greenlets.kill(block=True) only (the only times that this # greenlet will yield) self.completed.wait(timeout=total_timeout_secs) except: # includes LinkedCompleted - a worker should not exit cleanly # until we .kill them below logger.debug("_run_workers catch %s (will reraise)", sys.exc_info()[1]) raising = True raise finally: # don't leak workers. self._join_all(raise_exception=(not raising)) def _join_all(self, raise_exception=False): # we need the loop to run to completion and so have it catch and # hold or discard exceptions for later. # track the first exception caught and re-raise that exc_info = None while len(self._greenlets): try: self._greenlets.kill(block=True) except greenlet.LinkedCompleted: # now that we've killed workers, these are expected. # ignore. pass except greenlet.LinkedFailed as e: if exc_info is None and raise_exception: logger.debug("_join_all catch %s " "(will reraise)", e) exc_info = sys.exc_info() else: logger.debug("_join_all discarding %s " "(already have exc)", e) if exc_info is not None: try: raise exc_info[1], None, exc_info[2] finally: # avoid circular reference del exc_info def _file_complete(self): self.files_complete += 1 self.have_first_file = True if self.files_complete == self.files_count: self.completed.set() logger.info("progress %s/%s %s%%", self.files_complete, self.files_count, self.files_complete / self.files_count * 100) def close(self, move_files=None): if move_files is None: move_files = self.success if self._dataset is not None or self._gribmirror is not None or \ self._tmp_directory is not None: if move_files: logger.info("moving downloaded files") else: logger.info("deleting failed download files") if self._dataset is not None: self._dataset.close() self._dataset = None if move_files: self._move_file() else: self._delete_file() if self._gribmirror is not None: self._gribmirror.close() self._gribmirror = None if move_files: self._move_file(Dataset.SUFFIX_GRIBMIRROR) else: self._delete_file(Dataset.SUFFIX_GRIBMIRROR) if self._tmp_directory is not None: self._remove_download_directory() self._tmp_directory = None def __del__(self): self.close() def _remove_download_directory(self): l = os.listdir(self._tmp_directory) if l: logger.warning("cleaning %s unknown file%s in temporary directory", len(l), '' if len(l) == 1 else 's') logger.debug("removing temporary directory") shutil.rmtree(self._tmp_directory) def _move_file(self, suffix=''): fn1 = Dataset.filename(self.ds_time, directory=self._tmp_directory, suffix=suffix) fn2 = Dataset.filename(self.ds_time, directory=self.directory, suffix=suffix) logger.debug("renaming %s to %s", fn1, fn2) os.rename(fn1, fn2) def _delete_file(self, suffix=''): fn = Dataset.filename(self.ds_time, directory=self._tmp_directory, suffix=suffix) logger.warning("deleting %s", fn) os.unlink(fn)
class AWSLogs(object): ACTIVE = 1 EXHAUSTED = 2 WATCH_SLEEP = 2 def __init__(self, **kwargs): self.connection_cls = kwargs.get('connection_cls', AWSConnection) self.aws_region = kwargs.get('aws_region') self.aws_access_key_id = kwargs.get('aws_access_key_id') self.aws_secret_access_key = kwargs.get('aws_secret_access_key') self.log_group_name = kwargs.get('log_group_name') self.log_stream_name = kwargs.get('log_stream_name') self.watch = kwargs.get('watch') self.color_enabled = kwargs.get('color_enabled') self.output_stream_enabled = kwargs.get('output_stream_enabled') self.output_group_enabled = kwargs.get('output_group_enabled') self.start = self.parse_datetime(kwargs.get('start')) self.end = self.parse_datetime(kwargs.get('end')) self.pool_size = max(kwargs.get('pool_size', 0), 10) self.max_group_length = 0 self.max_stream_length = 0 self.publishers = [] self.events_queue = Queue() self.raw_events_queue = PriorityQueue() self.publishers_queue = PriorityQueue() self.publishers = [] self.stream_status = {} self.stream_max_timestamp = {} self.connection = self.connection_cls( self.aws_region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key ) def _get_streams_from_patterns(self, log_group_pattern, log_stream_pattern): """Returns pairs of group, stream matching ``log_group_pattern`` and ``log_stream_pattern``.""" for group in self._get_groups_from_pattern(log_group_pattern): for stream in self._get_streams_from_pattern(group, log_stream_pattern): yield group, stream def _get_groups_from_pattern(self, pattern): """Returns groups matching ``pattern``.""" pattern = '.*' if pattern == 'ALL' else pattern reg = re.compile('^{0}'.format(pattern)) for group in self.get_groups(): if re.match(reg, group): yield group def _get_streams_from_pattern(self, group, pattern): """Returns streams in ``group`` matching ``pattern``.""" pattern = '.*' if pattern == 'ALL' else pattern reg = re.compile('^{0}'.format(pattern)) for stream in self.get_streams(group): if re.match(reg, stream): yield stream def _publisher_queue_consumer(self): """Consume ``publishers_queue`` api calls, run them and publish log events to ``raw_events_queue``. If ``nextForwardToken`` is present register a new api call into ``publishers_queue`` using as weight the timestamp of the latest event.""" while True: try: _, (log_group_name, log_stream_name, next_token) = self.publishers_queue.get(block=False) except Empty: if self.watch: gevent.sleep(self.WATCH_SLEEP) else: break response = self.connection.get_log_events( next_token=next_token, log_group_name=log_group_name, log_stream_name=log_stream_name, start_time=self.start, end_time=self.end, start_from_head=True ) if not len(response['events']): self.stream_status[(log_group_name, log_stream_name)] = self.EXHAUSTED continue self.stream_status[(log_group_name, log_stream_name)] = self.ACTIVE for event in response['events']: event['group'] = log_group_name event['stream'] = log_stream_name self.raw_events_queue.put((event['timestamp'], event)) self.stream_max_timestamp[(log_group_name, log_stream_name)] = event['timestamp'] if 'nextForwardToken' in response: self.publishers_queue.put( (response['events'][-1]['timestamp'], (log_group_name, log_stream_name, response['nextForwardToken'])) ) def _get_min_timestamp(self): """Return the minimum timestamp available across all active streams.""" pending = [self.stream_max_timestamp[k] for k, v in self.stream_status.iteritems() if v != self.EXHAUSTED] return min(pending) if pending else None def _get_all_streams_exhausted(self): """Return if all streams are exhausted.""" return all((s == self.EXHAUSTED for s in self.stream_status.itervalues())) def _raw_events_queue_consumer(self): """Consume events from ``raw_events_queue`` if all active streams have already publish events up to the ``_get_min_timestamp`` and register them in order into ``events_queue``.""" while True: if self._get_all_streams_exhausted() and self.raw_events_queue.empty(): if self.watch: gevent.sleep(self.WATCH_SLEEP) continue self.events_queue.put(NO_MORE_EVENTS) break try: timestamp, line = self.raw_events_queue.peek(timeout=1) except Empty: continue min_timestamp = self._get_min_timestamp() if min_timestamp and min_timestamp < timestamp: gevent.sleep(0.3) continue timestamp, line = self.raw_events_queue.get() output = [line['message']] if self.output_stream_enabled: output.insert( 0, self.color( line['stream'].ljust(self.max_stream_length, ' '), 'cyan' ) ) if self.output_group_enabled: output.insert( 0, self.color( line['group'].ljust(self.max_group_length, ' '), 'green' ) ) self.events_queue.put("{0}\n".format(' '.join(output))) def _events_consumer(self): """Print events from ``events_queue`` as soon as they are available.""" while True: event = self.events_queue.get(True) if event == NO_MORE_EVENTS: break sys.stdout.write(event) sys.stdout.flush() def list_logs(self): self.register_publishers() pool = Pool(size=self.pool_size) pool.spawn(self._raw_events_queue_consumer) pool.spawn(self._events_consumer) if self.watch: pool.spawn(self.register_publishers_periodically) for i in xrange(self.pool_size): pool.spawn(self._publisher_queue_consumer) pool.join() def register_publishers(self): """Register publishers into ``publishers_queue``.""" for group, stream in self._get_streams_from_patterns(self.log_group_name, self.log_stream_name): if (group, stream) in self.publishers: continue self.publishers.append((group, stream)) self.max_group_length = max(self.max_group_length, len(group)) self.max_stream_length = max(self.max_stream_length, len(stream)) self.publishers_queue.put((0, (group, stream, None))) self.stream_status[(group, stream)] = self.ACTIVE self.stream_max_timestamp[(group, stream)] = -1 def register_publishers_periodically(self): while True: self.register_publishers() gevent.sleep(2) def list_groups(self): """Lists available CloudWatch logs groups""" for group in self.get_groups(): print group def list_streams(self, *args, **kwargs): """Lists available CloudWatch logs streams in ``log_group_name``.""" for stream in self.get_streams(*args, **kwargs): print stream def get_groups(self): """Returns available CloudWatch logs groups""" next_token = None while True: response = self.connection.describe_log_groups(next_token=next_token) for group in response.get('logGroups', []): yield group['logGroupName'] if 'nextToken' in response: next_token = response['nextToken'] else: break def get_streams(self, log_group_name=None): """Returns available CloudWatch logs streams in ``log_group_name``.""" log_group_name = log_group_name or self.log_group_name next_token = None while True: response = self.connection.describe_log_streams( log_group_name=log_group_name, next_token=next_token ) for stream in response.get('logStreams', []): yield stream['logStreamName'] if 'nextToken' in response: next_token = response['nextToken'] else: break def color(self, text, color): """Returns coloured version of ``text`` if ``color_enabled``.""" if self.color_enabled: return colored(text, color) return text def parse_datetime(self, datetime_text): """Parse ``datetime_text`` into a ``datetime``.""" if not datetime_text: return None ago_match = re.match(r'(\d+)\s?(m|minute|minutes|h|hour|hours|d|day|days|w|weeks|weeks)(?: ago)?', datetime_text) if ago_match: amount, unit = ago_match.groups() amount = int(amount) unit = {'m': 60, 'h': 3600, 'd': 86400, 'w': 604800}[unit[0]] date = datetime.now() + timedelta(seconds=unit * amount * -1) else: try: date = parse(datetime_text) except ValueError: raise exceptions.UnknownDateError(datetime_text) return int(date.strftime("%s")) * 1000
def __init__(self, directory, ds_time, timeout=120, first_file_timeout=600, bad_download_retry_limit=3, write_dataset=True, write_gribmirror=True, deadline=None, dataset_host="ftp.ncep.noaa.gov", dataset_path="/pub/data/nccf/com/gfs/prod/gfs.{0}/"): # set these ASAP for close() via __del__ if __init__ raises something self.success = False self._dataset = None self._gribmirror = None self._tmp_directory = None assert ds_time.hour in (0, 6, 12, 18) assert ds_time.minute == ds_time.second == ds_time.microsecond == 0 if not (write_dataset or write_gribmirror): raise ValueError("Choose write_datset or write_gribmirror " "(or both)") if deadline is None: deadline = max(datetime.now() + timedelta(hours=2), ds_time + timedelta(hours=9, minutes=30)) self.directory = directory self.ds_time = ds_time self.timeout = timeout self.first_file_timeout = first_file_timeout self.write_dataset = write_dataset self.write_gribmirror = write_gribmirror self.bad_download_retry_limit = bad_download_retry_limit self.deadline = deadline self.dataset_host = dataset_host self.dataset_path = dataset_path self.have_first_file = False self.files_complete = 0 self.files_count = 0 self.completed = Event() ds_time_str = self.ds_time.strftime("%Y%m%d%H") self.remote_directory = dataset_path.format(ds_time_str) self._greenlets = Group() self.unpack_lock = RLock() # Items in the queue are # (hour, sleep_until, filename, ...) # so they sort by hour, and then if a not-found adds a delay to # a specific file, files from that hour without the delay # are tried first self._files = PriorityQueue() # areas in self.dataset.array are considered 'undefined' until # self.checklist[index[:3]] is True, since unpack_grib may # write to them, and then abort via ValueError before marking # updating the checklist if the file turns out later to be bad # the checklist also serves as a sort of final sanity check: # we also have "does this file contain all the records we think it # should" checklists; see Worker._download_file self._checklist = make_checklist()
class ConnectionPool(object): def __init__(self, factory, retry_max=3, retry_delay=.1, timeout=-1, max_lifetime=600., max_size=10, options=None): self.max_size = max_size self.pool = PriorityQueue() self.size = 0 self.factory = factory self.retry_max = retry_max self.retry_delay = retry_delay self.timeout = timeout self.max_lifetime = max_lifetime if options is None: self.options = {} else: self.options = options def too_old(self, conn): return time.time() - conn.get_lifetime() > self.max_lifetime def release_connection(self, conn): connected = conn.is_connected() if connected and not self.too_old(conn): self.pool.put((conn.get_lifetime(), conn)) else: conn.invalidate() def get(self, **options): pool = self.pool # first let's try to find a matching one found = None if self.size >= self.max_size or pool.qsize(): for priority, candidate in pool: if self.too_old(candidate): # let's drop it continue matches = candidate.matches(**options) if not matches: # let's put it back pool.put((priority, candidate)) else: found = candidate break # we got one.. we use it if found is not None: return found # we build a new one and send it back tries = 0 last_error = None while tries < self.retry_max: self.size += 1 try: new_item = self.factory(**options) except Exception, e: self.size -= 1 last_error = e else: # we should be connected now if new_item.is_connected(): return new_item tries += 1 gevent.sleep(self.retry_delay) if last_error is None: raise MaxTriesError() else: raise last_error
def __init__(self): self.inbox = PriorityQueue() self._handlers = {ShutdownRequest: self.receive_shutdown} Greenlet.__init__(self)
class SubNameBrute: def __init__(self, target, options, process_num, dns_servers, next_subs, scan_count, found_count, queue_size_list, tmp_dir): self.target = target.strip() self.options = options self.process_num = process_num self.dns_servers = dns_servers self.dns_count = len(dns_servers) self.next_subs = next_subs self.scan_count = scan_count self.scan_count_local = 0 self.found_count = found_count self.found_count_local = 0 self.queue_size_list = queue_size_list self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)] for _r in self.resolvers: _r.lifetime = _r.timeout = 6.0 self.queue = PriorityQueue() self.item_index = 0 self.priority = 0 self._load_sub_names() self.ip_dict = {} self.found_subs = set() self.ex_resolver = dns.resolver.Resolver(configure=False) self.ex_resolver.nameservers = dns_servers self.local_time = time.time() self.outfile = open('%s/%s_part_%s.txt' % (tmp_dir, target, process_num), 'w') def _load_sub_names(self): if self.options.full_scan and self.options.file == 'subnames.txt': _file = 'dict/subnames_full.txt' else: if os.path.exists(self.options.file): _file = self.options.file elif os.path.exists('dict/%s' % self.options.file): _file = 'dict/%s' % self.options.file else: print_msg('[ERROR] Names file not found: %s' % self.options.file) exit(-1) normal_lines = [] wildcard_lines = [] wildcard_list = [] regex_list = [] lines = set() with open(_file) as f: for line in f.xreadlines(): sub = line.strip() if not sub or sub in lines: continue lines.add(sub) if sub.find('{alphnum}') >= 0 or sub.find('{alpha}') >= 0 or sub.find('{num}') >= 0: wildcard_lines.append(sub) sub = sub.replace('{alphnum}', '[a-z0-9]') sub = sub.replace('{alpha}', '[a-z]') sub = sub.replace('{num}', '[0-9]') if sub not in wildcard_list: wildcard_list.append(sub) regex_list.append('^' + sub + '$') else: normal_lines.append(sub) if regex_list: pattern = '|'.join(regex_list) _regex = re.compile(pattern) for line in normal_lines[:]: if _regex.search(line): normal_lines.remove(line) for item in normal_lines[self.process_num::self.options.process]: self.priority += 1 self.queue.put((self.priority, item)) for item in wildcard_lines[self.process_num::self.options.process]: self.queue.put((88888888, item)) def put_item(self, item): num = item.count('{alphnum}') + item.count('{alpha}') + item.count('{num}') if num == 0: self.priority += 1 self.queue.put((self.priority, item)) else: self.queue.put((self.priority + num * 10000000, item)) def _scan(self, j): self.resolvers[j].nameservers = [self.dns_servers[j % self.dns_count]] while not self.queue.empty(): try: item = self.queue.get(timeout=3.0)[1] self.scan_count_local += 1 if time.time() - self.local_time > 3.0: self.scan_count.value += self.scan_count_local self.scan_count_local = 0 self.queue_size_list[self.process_num] = self.queue.qsize() except Exception as e: break try: if item.find('{alphnum}') >= 0: for _letter in 'abcdefghijklmnopqrstuvwxyz0123456789': self.put_item(item.replace('{alphnum}', _letter, 1)) continue elif item.find('{alpha}') >= 0: for _letter in 'abcdefghijklmnopqrstuvwxyz': self.put_item(item.replace('{alpha}', _letter, 1)) continue elif item.find('{num}') >= 0: for _letter in '0123456789': self.put_item(item.replace('{num}', _letter, 1)) continue elif item.find('{next_sub}') >= 0: for _ in self.next_subs: self.queue.put((0, item.replace('{next_sub}', _, 1))) continue else: sub = item if sub in self.found_subs: continue cur_sub_domain = sub + '.' + self.target _sub = sub.split('.')[-1] try: answers = self.resolvers[j].query(cur_sub_domain) except dns.resolver.NoAnswer, e: answers = self.ex_resolver.query(cur_sub_domain) if answers: self.found_subs.add(sub) ips = ', '.join(sorted([answer.address for answer in answers])) if ips in ['1.1.1.1', '127.0.0.1', '0.0.0.0']: continue if self.options.i and is_intranet(answers[0].address): continue try: self.scan_count_local += 1 answers = self.resolvers[j].query(cur_sub_domain, 'cname') cname = answers[0].target.to_unicode().rstrip('.') if cname.endswith(self.target) and cname not in self.found_subs: self.found_subs.add(cname) cname_sub = cname[:len(cname) - len(self.target) - 1] # new sub self.queue.put((0, cname_sub)) except: pass if (_sub, ips) not in self.ip_dict: self.ip_dict[(_sub, ips)] = 1 else: self.ip_dict[(_sub, ips)] += 1 if self.ip_dict[(_sub, ips)] > 30: continue self.found_count_local += 1 if time.time() - self.local_time > 3.0: self.found_count.value += self.found_count_local self.found_count_local = 0 self.queue_size_list[self.process_num] = self.queue.qsize() self.local_time = time.time() msg = cur_sub_domain.ljust(30) + ips # print_msg(msg, line_feed=True) self.outfile.write(cur_sub_domain.ljust(30) + '\t' + ips + '\n') self.outfile.flush() try: self.resolvers[j].query('lijiejietest.' + cur_sub_domain) except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e: self.queue.put((999999999, '{next_sub}.' + sub)) except: pass except (dns.resolver.NXDOMAIN, dns.name.EmptyLabel) as e: pass except (dns.resolver.NoNameservers, dns.resolver.NoAnswer, dns.exception.Timeout) as e: pass