示例#1
0
 def inner_func(q: eventlet.Queue):
     a = MySingleton()
     b = MySingleton()
     q.put((
         a.uuid,
         b.uuid,
     ))
示例#2
0
class Dispatcher(EventThread):
    def __init__(self):
        self.proposer_queue = Queue()
        self.acceptor_queue = Queue()
    def _run(self):
        while True:
            #get recved string
            u = do_recv()
            m = Message()
            if not m.depack(u):
                log.debug('depack %s error', u)
                continue
            #check timestamp
            if m.timestamp - get_utc_time() > conf.proposer_timeout:
                log.debug('receive expired package, drop it')
                continue
            #check version
            if m.version != VERSION:
                log.debug('version not match')
                continue
            #check signature
            if m.signature != m.build_hmac_signature():
                log.info('message signature failed, may changed in network')
                continue

            if m.method in (PREPARE, ACCEPT, LEARN, RENEW):
                self.acceptor_queue.put(m)
            elif m.method in (NACK, ACK, OUTDATE):
                self.proposer_queue.put(m)
示例#3
0
文件: handlers.py 项目: amv213/jumbo
    def subscribe(self, q: eventlet.Queue) -> NoReturn:
        """Green thread process waiting for NOTIFYs on the channel and feeding
        them to the queue.

        Args:
            q:  event queue through which to pipe NOTIFY events to the main
                thread.
        """

        # Subscribe to notification channel
        self.database.listen_on_channel(self.channel, self.key)

        # Infinite listening loop
        while True:

            # self.database.pool._used[self.key] is the connection object
            # corresponding to [key] in the conneciton pool

            # spawns a green thread and return control once there is a
            # notification to read
            trampoline(self.database.pool._used[self.key], read=True)

            # once there is a notification --> poll
            self.database.pool._used[self.key].poll()

            while self.database.pool._used[self.key].notifies:
                # extract notify:
                notify = self.database.pool._used[self.key].notifies.pop()
                # block until slot available in queue to insert Notify:
                q.put(notify)
示例#4
0
class WSTestGenerator(WebSocketView):
    def handle_websocket(self, ws):
        self._ws = ws
        return super(RangeWebsocket, self)

    def handler(self, ws):
        self.queue = Queue()
        while True:
            m = ws.wait()
            #            import ipdb; ipdb.set_trace()
            if m is None:
                break
            self.queue.put(m)
class WSTestGenerator(WebSocketView):

    def handle_websocket(self, ws):
        self._ws = ws
        return super(RangeWebsocket, self)

    def handler(self, ws):
        self.queue = Queue()
        while True:
            m = ws.wait()
#            import ipdb; ipdb.set_trace()
            if m is None:
                break
            self.queue.put(m)
示例#6
0
    def take_action(self, parsed_args):
        self.log.debug('take_action(%s)', parsed_args)
        digits = self.app.client_manager.get_meta1_digits()
        workers_count = parsed_args.workers

        conf = {'namespace': self.app.client_manager.namespace}
        if parsed_args.proxy:
            conf.update({'proxyd_url': parsed_args.proxy})
        else:
            ns_conf = load_namespace_conf(conf['namespace'])
            proxy = ns_conf.get('proxy')
            conf.update({'proxyd_url': proxy})

        workers = list()
        with green.ContextPool(workers_count) as pool:
            pile = GreenPile(pool)
            prefix_queue = Queue(16)

            # Prepare some workers
            for i in range(workers_count):
                w = WarmupWorker(conf, self.log)
                workers.append(w)
                pile.spawn(w.run, prefix_queue)

            # Feed the queue
            trace_increment = 0.01
            trace_next = trace_increment
            sent, total = 0, float(count_prefixes(digits))
            for prefix in generate_prefixes(digits):
                sent += 1
                prefix_queue.put(prefix)
                # Display the progression
                ratio = float(sent) / total
                if ratio >= trace_next:
                    self.log.info("... %d%%", int(ratio * 100.0))
                    trace_next += trace_increment

            self.log.debug("Send the termination marker")
            prefix_queue.join()

        self.log.info("All the workers are done")
    def test_connection_pooling(self):
        with patch('swift.common.memcached.socket') as mock_module:
            # patch socket, stub socket.socket, mock sock
            mock_sock = mock_module.socket.return_value

            # track clients waiting for connections
            connected = []
            connections = Queue()

            def wait_connect(addr):
                connected.append(addr)
                connections.get()
            mock_sock.connect = wait_connect

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211'],
                                                     connect_timeout=10)
            # sanity
            self.assertEquals(1, len(memcache_client._client_cache))
            for server, pool in memcache_client._client_cache.items():
                self.assertEquals(2, pool.max_size)

            # make 10 requests "at the same time"
            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')
            for i in range(3):
                sleep(0.1)
                self.assertEquals(2, len(connected))
            # give out a connection
            connections.put(None)
            for i in range(3):
                sleep(0.1)
                self.assertEquals(2, len(connected))
            # finish up
            for i in range(8):
                connections.put(None)
            self.assertEquals(2, len(connected))
            p.waitall()
            self.assertEquals(2, len(connected))
示例#8
0
    def test_connection_pooling(self):
        with patch('swift.common.memcached.socket') as mock_module:
            # patch socket, stub socket.socket, mock sock
            mock_sock = mock_module.socket.return_value

            # track clients waiting for connections
            connected = []
            connections = Queue()
            errors = []

            def wait_connect(addr):
                connected.append(addr)
                sleep(0.1)  # yield
                val = connections.get()
                if val is not None:
                    errors.append(val)

            mock_sock.connect = wait_connect

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211'],
                                                     connect_timeout=10)
            # sanity
            self.assertEquals(1, len(memcache_client._client_cache))
            for server, pool in memcache_client._client_cache.items():
                self.assertEqual(2, pool.max_size)

            # make 10 requests "at the same time"
            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # give out a connection
            connections.put(None)

            # at this point, only one connection should have actually been
            # created, the other is in the creation step, and the rest of the
            # clients are not attempting to connect. we let this play out a
            # bit to verify.
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # finish up, this allows the final connection to be created, so
            # that all the other clients can use the two existing connections
            # and no others will be created.
            connections.put(None)
            connections.put('nono')
            self.assertEqual(2, len(connected))
            p.waitall()
            self.assertEqual(2, len(connected))
            self.assertEqual(0, len(errors),
                             "A client was allowed a third connection")
            connections.get_nowait()
            self.assertTrue(connections.empty())
示例#9
0
    def test_connection_pooling(self):
        with patch('swift.common.memcached.socket') as mock_module:
            # patch socket, stub socket.socket, mock sock
            mock_sock = mock_module.socket.return_value

            # track clients waiting for connections
            connected = []
            connections = Queue()
            errors = []

            def wait_connect(addr):
                connected.append(addr)
                sleep(0.1)  # yield
                val = connections.get()
                if val is not None:
                    errors.append(val)

            mock_sock.connect = wait_connect

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211'],
                                                     connect_timeout=10)
            # sanity
            self.assertEquals(1, len(memcache_client._client_cache))
            for server, pool in memcache_client._client_cache.items():
                self.assertEqual(2, pool.max_size)

            # make 10 requests "at the same time"
            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # give out a connection
            connections.put(None)

            # at this point, only one connection should have actually been
            # created, the other is in the creation step, and the rest of the
            # clients are not attempting to connect. we let this play out a
            # bit to verify.
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # finish up, this allows the final connection to be created, so
            # that all the other clients can use the two existing connections
            # and no others will be created.
            connections.put(None)
            connections.put('nono')
            self.assertEqual(2, len(connected))
            p.waitall()
            self.assertEqual(2, len(connected))
            self.assertEqual(0, len(errors),
                             "A client was allowed a third connection")
            connections.get_nowait()
            self.assertTrue(connections.empty())
示例#10
0
文件: simrt.py 项目: b2rex/b2rex
class GreenletsThread(Thread):
    """
    Main thread for the program. If running stand alone this will be running
    as a greenlet instead.
    """
    def __init__ (self, server_url, login_params):
        self.running = True
        self.agent = True
        self.cmd_out_queue = []
        self.cmd_in_queue = []
        self.out_queue = Queue()
        self.in_queue = Queue()
        self.server_url = server_url
        self.login_params = login_params
        Thread.__init__(self)

    def apply_position(self, obj_uuid, pos, rot=None):
        cmd = ['pos', obj_uuid, pos, rot]
        self.addCmd(cmd)

    def __getattr__(self, name):
        return ProxyFunction(name, self)

    def apply_scale(self, obj_uuid, scale):
        cmd = ['scale', obj_uuid, scale]
        self.addCmd(cmd)

    def run(self):
        agent = AgentManager(self.in_queue,
                   self.out_queue)
        error = agent.login(self.server_url, self.login_params)
        if error:
            self.out_queue.put(["error", str(error)])
            self.out_queue.put(["agentquit", str(error)])
            while self.out_queue.qsize():
                api.sleep(0.1)
        agent.logger.debug("Quitting")
        self.agent = agent
        self.running = False

    def addCmd(self, cmd):
        self.in_queue.put(cmd)

    def getQueue(self):
        out_queue = []
        while self.out_queue.qsize():
            out_queue.append(self.out_queue.get())
        return out_queue
示例#11
0
class GreenletsThread(Thread):
    """
    Main thread for the program. If running stand alone this will be running
    as a greenlet instead.
    """
    def __init__(self, server_url, login_params):
        self.running = True
        self.agent = True
        self.cmd_out_queue = []
        self.cmd_in_queue = []
        self.out_queue = Queue()
        self.in_queue = Queue()
        self.server_url = server_url
        self.login_params = login_params
        Thread.__init__(self)

    def apply_position(self, obj_uuid, pos, rot=None):
        cmd = ['pos', obj_uuid, pos, rot]
        self.addCmd(cmd)

    def __getattr__(self, name):
        return ProxyFunction(name, self)

    def apply_scale(self, obj_uuid, scale):
        cmd = ['scale', obj_uuid, scale]
        self.addCmd(cmd)

    def run(self):
        agent = AgentManager(self.in_queue, self.out_queue)
        error = agent.login(self.server_url, self.login_params)
        if error:
            self.out_queue.put(["error", str(error)])
            self.out_queue.put(["agentquit", str(error)])
            while self.out_queue.qsize():
                api.sleep(0.1)
        agent.logger.debug("Quitting")
        self.agent = agent
        self.running = False

    def addCmd(self, cmd):
        self.in_queue.put(cmd)

    def getQueue(self):
        out_queue = []
        while self.out_queue.qsize():
            out_queue.append(self.out_queue.get())
        return out_queue
def run_stock_parser():
    symbol_q = Queue()
    price_q = Queue()

    stock_symbols = []
    with open('symbols.txt', 'r') as symfile:
        for n, line in enumerate(symfile):
            sym = line.strip()
            if sym:
                stock_symbols.append(sym)

    ncpu = cpu_count()

    pool = [spawn(read_stock_worker, symbol_q, price_q) for _ in range(ncpu * 2)]
    output = spawn(write_output_file, price_q)

    for symbol in stock_symbols:
        symbol_q.put(symbol)
    symbol_q.put(_sentinel)
    for p in pool:
        p.wait()
    price_q.put(_sentinel)
    output.wait()
示例#13
0
文件: ec.py 项目: fvennetier/oio-sds
class ECWriter(object):
    """
    Writes an EC chunk
    """
    def __init__(self, chunk, conn):
        self._chunk = chunk
        self._conn = conn
        self.failed = False
        self.bytes_transferred = 0
        self.checksum = hashlib.md5()

    @property
    def chunk(self):
        return self._chunk

    @property
    def conn(self):
        return self._conn

    @classmethod
    def connect(cls, chunk, sysmeta, reqid=None):
        raw_url = chunk["url"]
        parsed = urlparse(raw_url)
        chunk_path = parsed.path.split('/')[-1]
        h = {}
        h["transfer-encoding"] = "chunked"
        h[chunk_headers["content_id"]] = sysmeta['id']
        h[chunk_headers["content_path"]] = sysmeta['content_path']
        h[chunk_headers["content_chunkmethod"]] = sysmeta['chunk_method']
        h[chunk_headers["container_id"]] = sysmeta['container_id']
        h[chunk_headers["chunk_pos"]] = chunk["pos"]
        h[chunk_headers["chunk_id"]] = chunk_path
        h[chunk_headers["content_policy"]] = sysmeta['policy']
        h[chunk_headers["content_version"]] = sysmeta['version']
        if reqid:
            h['X-oio-req-id'] = reqid

        # in the trailer
        # metachunk_size & metachunk_hash
        h["Trailer"] = (chunk_headers["metachunk_size"],
                        chunk_headers["metachunk_hash"])
        with ConnectionTimeout(io.CONNECTION_TIMEOUT):
            conn = io.http_connect(
                parsed.netloc, 'PUT', parsed.path, h)
            conn.chunk = chunk
        return cls(chunk, conn)

    def start(self, pool):
        # we use eventlet Queue to pass data to the send coroutine
        self.queue = Queue(io.PUT_QUEUE_DEPTH)
        # spawn the send coroutine
        pool.spawn(self._send)

    def _send(self):
        # this is the send coroutine loop
        while True:
            # fetch input data from the queue
            d = self.queue.get()
            # use HTTP transfer encoding chunked
            # to write data to RAWX
            if not self.failed:
                # format the chunk
                to_send = "%x\r\n%s\r\n" % (len(d), d)
                try:
                    with ChunkWriteTimeout(io.CHUNK_TIMEOUT):
                        self.conn.send(to_send)
                        self.bytes_transferred += len(d)
                except (Exception, ChunkWriteTimeout) as e:
                    self.failed = True
                    msg = str(e)
                    logger.warn("Failed to write to %s (%s)", self.chunk, msg)
                    self.chunk['error'] = msg

            self.queue.task_done()

    def wait(self):
        # wait until all data in the queue
        # has been processed by the send coroutine
        if self.queue.unfinished_tasks:
            self.queue.join()

    def send(self, data):
        # do not send empty data because
        # this will end the chunked body
        if not data:
            return
        # put the data to send into the queue
        # it will be processed by the send coroutine
        self.queue.put(data)

    def finish(self, metachunk_size, metachunk_hash):
        parts = [
            '0\r\n',
            '%s: %s\r\n' % (chunk_headers['metachunk_size'],
                            metachunk_size),
            '%s: %s\r\n' % (chunk_headers['metachunk_hash'],
                            metachunk_hash),
            '\r\n'
        ]
        to_send = "".join(parts)
        self.conn.send(to_send)

    def getresponse(self):
        # read the HTTP response from the connection
        with Timeout(io.CHUNK_TIMEOUT):
            self.resp = self.conn.getresponse()
            return self.resp
示例#14
0
文件: ec.py 项目: uneidel/oio-sds
class EcChunkWriter(object):
    """
    Writes an EC chunk
    """
    def __init__(self, chunk, conn, write_timeout=None, **_kwargs):
        self._chunk = chunk
        self._conn = conn
        self.failed = False
        self.bytes_transferred = 0
        self.checksum = hashlib.md5()
        self.write_timeout = write_timeout or io.CHUNK_TIMEOUT
        # we use eventlet Queue to pass data to the send coroutine
        self.queue = Queue(io.PUT_QUEUE_DEPTH)

    @property
    def chunk(self):
        return self._chunk

    @property
    def conn(self):
        return self._conn

    @classmethod
    def connect(cls,
                chunk,
                sysmeta,
                reqid=None,
                connection_timeout=None,
                write_timeout=None,
                **_kwargs):
        raw_url = chunk["url"]
        parsed = urlparse(raw_url)
        chunk_path = parsed.path.split('/')[-1]
        hdrs = headers_from_object_metadata(sysmeta)
        if reqid:
            hdrs['X-oio-req-id'] = reqid

        hdrs[chunk_headers["chunk_pos"]] = chunk["pos"]
        hdrs[chunk_headers["chunk_id"]] = chunk_path

        # in the trailer
        # metachunk_size & metachunk_hash
        hdrs["Trailer"] = ', '.join(
            (chunk_headers["metachunk_size"], chunk_headers["metachunk_hash"],
             chunk_headers["chunk_hash"]))
        with green.ConnectionTimeout(connection_timeout
                                     or io.CONNECTION_TIMEOUT):
            conn = io.http_connect(parsed.netloc, 'PUT', parsed.path, hdrs)
            conn.chunk = chunk
        return cls(chunk, conn, write_timeout=write_timeout)

    def start(self, pool):
        """Spawn the send coroutine"""
        pool.spawn(self._send)

    def _send(self):
        """Send coroutine loop"""
        while True:
            # fetch input data from the queue
            data = self.queue.get()
            # use HTTP transfer encoding chunked
            # to write data to RAWX
            if not self.failed:
                # format the chunk
                to_send = "%x\r\n%s\r\n" % (len(data), data)
                try:
                    with green.ChunkWriteTimeout(self.write_timeout):
                        self.conn.send(to_send)
                        self.bytes_transferred += len(data)
                except (Exception, green.ChunkWriteTimeout) as exc:
                    self.failed = True
                    msg = str(exc)
                    logger.warn("Failed to write to %s (%s)", self.chunk, msg)
                    self.chunk['error'] = 'write: %s' % msg

            self.queue.task_done()

    def wait(self):
        """
        Wait until all data in the queue
        has been processed by the send coroutine
        """
        if self.queue.unfinished_tasks:
            self.queue.join()

    def send(self, data):
        # do not send empty data because
        # this will end the chunked body
        if not data:
            return
        # put the data to send into the queue
        # it will be processed by the send coroutine
        self.queue.put(data)

    def finish(self, metachunk_size, metachunk_hash):
        """Send metachunk_size and metachunk_hash as trailers"""
        parts = [
            '0\r\n',
            '%s: %s\r\n' % (chunk_headers['metachunk_size'], metachunk_size),
            '%s: %s\r\n' % (chunk_headers['metachunk_hash'], metachunk_hash),
            '%s: %s\r\n' %
            (chunk_headers['chunk_hash'], self.checksum.hexdigest()), '\r\n'
        ]
        to_send = "".join(parts)
        self.conn.send(to_send)

    def getresponse(self):
        """Read the HTTP response from the connection"""
        # As the server may buffer data before writing it to non-volatile
        # storage, we don't know if we have to wait while sending data or
        # while reading response, thus we apply the same timeout to both.
        with Timeout(self.write_timeout):
            return self.conn.getresponse()
示例#15
0
class Manager(object):
    """Class encapsulating Heroshi URL server state."""

    def __init__(self):
        self.active = False

        self.prefetch_queue = Queue(settings.prefetch['queue_size'])
        self.prefetch_thread = spawn(self.prefetch_worker)
        self.prefetch_thread.link(reraise_errors, greenthread.getcurrent())

        self.given_items = Cache()

        self.postreport_queue = Queue(settings.postreport['queue_size'])
        self.postreport_thread = spawn(self.postreport_worker)
        self.postreport_thread.link(reraise_errors, greenthread.getcurrent())

        self.storage_connections = eventlet.pools.Pool(max_size=settings.storage['max_connections'])
        self.storage_connections.create = StorageConnection

    def close(self):
        self.active = False
        self.prefetch_thread.kill()
        self.postreport_thread.kill()

    def ping_storage(self):
        with self.storage_connections.item() as storage:
            pass

    def get_from_prefetch_queue(self, size):
        result = []
        while len(result) < size:
            sleep()
            try:
                pack = self.prefetch_queue.get(timeout=settings.prefetch['get_timeout'])
            except eventlet.queue.Empty:
                break
            result.extend(pack)
        return result

    def prefetch_worker(self):
        if not self.active:
            sleep(0.01)
        while self.active:
            with self.storage_connections.item() as storage:
                docs = storage.query_new_random(settings.prefetch['single_limit'])
            if len(docs) == 0:
                sleep(10.)
                continue
            else:
                # Note: putting a *list* as a single item into queue
                self.prefetch_queue.put(docs)
        # and respawn again
        self.prefetch_thread = spawn(self.prefetch_worker)

    @log_exceptions
    def _postreport_worker(self):
        docs = []
        while len(docs) < settings.postreport['flush_size']: # inner accumulator loop
            try:
                item = self.postreport_queue.get(timeout=settings.postreport['flush_delay'])
            except eventlet.queue.Empty:
                break

            # Quick dirty duplicate filtering.
            #     Note that this code only finds dups in current "flush pack". `report_result` uses
            # `is_duplicate_report` which finds dups in whole `postreport_queue` but it can't find dups here.
            # Thus two dups searchers.
            #     It is still possible that at most 2 duplicate reports exist: one in `postreport_queue`
            # and one in current "flush pack". This is acceptable, because most of the dups are filtered out.
            for doc in docs:
                if item['url'] == doc['url']:
                    item = None
                    break
            if item is None:
                continue

            if 'result' not in item:
                # It's a link, found on some reported page.
                # Just add it to bulk insert, don't try to update any document here.
                docs.append(item)
                continue

            docs.append(item)

        if not docs:
            return

        with self.storage_connections.item() as storage:
            for doc in docs:
                content = doc.pop('content', None)

                storage.save(doc)

                if content is None:
                    continue
                headers = doc.get('headers') or {}
                content_type = headers.get('content-type', "application/octet-stream")

                storage.save_content(doc, content, content_type)

    def postreport_worker(self):
        if not self.active:
            sleep(0.01)

        while self.active:
            self._postreport_worker()

        # and respawn again
        self.postreport_thread = spawn(self.prefetch_worker)

    @log_exceptions
    def crawl_queue(self, request):
        limit = max(int(request.POST['limit']), settings.api['max_queue_limit'])

        time_now = datetime.datetime.now()

        doc_list = self.get_from_prefetch_queue(limit)
        for doc in doc_list:
            if isinstance(doc['visited'], basestring):
                doc['visited'] = datetime.datetime.strptime(doc['visited'], TIME_FORMAT)
            self.given_items.set(doc['url'], doc, settings.prefetch['cache_timeout'])

        def is_old(doc):
            """Predicate tells if page was never visited or visited long enough ago.

            Worker SHOULD NOT visit URI, if this function returns False.
            """
            if doc['visited'] is None:
                return True
            diff = time_now - doc['visited']
            return diff > datetime.timedelta(minutes=settings.api['min_revisit_minutes'])

        doc_list = filter(is_old, doc_list)

        def make_queue_item(doc):
            if isinstance(doc['visited'], datetime.datetime):
                doc['visited'] = doc['visited'].strftime(TIME_FORMAT)
            filter_fields = ('url', 'headers', 'visited',)
            return dict( (k,v) for (k,v) in doc.iteritems() if k in filter_fields )

        queue = map(make_queue_item, doc_list)
        return queue

    def is_duplicate_report(self, url):
        """Quick dirty duplicate searching."""

        for doc in self.postreport_queue.queue:
            if url == doc['url']:
                return True
        return False

    def force_append_links(self, links):
        # 1. remove duplicates
        links = set(links)

        # 2. put links into queue
        for url in links:
            new_doc = {'url': url, 'parent': None, 'visited': None}
            self.postreport_queue.put(new_doc)

    @log_exceptions
    def report_result(self, request):
        report = json.loads(request.body)

        # `report['links']` now used only to force insertion of new URLs into
        #   Heroshi crawling queue via bin/heroshi-append script.
        # So, if a more sophisticated way to append new URLs is to arise,
        #   remove this code.
        if report['url'] is None:
            self.force_append_links(report['links'])
            return

        if self.is_duplicate_report(report['url']):
            return

        # accept report into postreport_queue for later persistent saving
        try:
            doc = self.given_items[report['url']]
        except KeyError:
            self.postreport_queue.put(report)
        else:
            doc.update(report)
            self.postreport_queue.put(doc)

        return None
示例#16
0
def main():
    args = options()

    global ACCOUNT, PROXY, QUEUE, NS, VERBOSE, TIMEOUT
    global COUNTERS, ELECTIONS
    ACCOUNT = args.account
    NS = args.namespace
    VERBOSE = args.verbose
    TIMEOUT = args.timeout
    PROXY = ObjectStorageApi(NS)
    ELECTIONS = AtomicInteger()

    num_worker_threads = int(args.max_worker)
    print("Using %d workers" % num_worker_threads)

    total_objects = {'size': 0, 'files': 0, 'elapsed': 0}
    total_containers = {'size': 0, 'files': 0, 'elapsed': 0}

    for path in args.path:
        path = path.rstrip('/')
        if '/' in path:
            bucket, path = path.split('/', 1)
        else:
            bucket = path
            path = ""

        containers = []

        QUEUE = Queue()
        pool = eventlet.GreenPool(num_worker_threads)

        for i in range(num_worker_threads):
            pool.spawn(worker_objects)

        COUNTERS = AtomicInteger()
        _bucket = container_hierarchy(bucket, path)
        # we don't use placeholders, we use prefix path as prefix
        for entry in full_list(prefix=container_hierarchy(bucket, path)):
            name, _files, _size, _ = entry
            if name != _bucket and not name.startswith(_bucket + '%2F'):
                continue

            if _files:
                QUEUE.put(name)

            containers.append(name)

        # we have to wait all objects
        print("Waiting flush of objects")

        report = args.report

        while not QUEUE.empty():
            ts = time.time()
            while time.time() - ts < report and not QUEUE.empty():
                time.sleep(1)
            diff = time.time() - ts
            val = COUNTERS.reset()
            elections = ELECTIONS.reset()
            print("Objects: %5.2f / Size: %5.2f" %
                  (val[0] / diff, val[1] / diff),
                  "Elections failed: %5.2f/s total: %d" %
                  (elections[0] / diff, ELECTIONS.total()[0]),
                  " " * 20,
                  end='\r')
            sys.stdout.flush()

        print("Waiting end of workers")
        QUEUE.join()

        val = COUNTERS.total()
        total_objects['files'] += val[0]
        total_objects['size'] += val[1]
        total_objects['elapsed'] += COUNTERS.time()

        COUNTERS = AtomicInteger()

        QUEUE = Queue()
        for i in range(num_worker_threads):
            pool.spawn(worker_container)

        print("We have to delete", len(containers), "containers")

        for container in containers:
            QUEUE.put(container)

        while not QUEUE.empty():
            ts = time.time()
            while time.time() - ts < report and not QUEUE.empty():
                time.sleep(1)
            diff = time.time() - ts
            val = COUNTERS.reset()
            elections = ELECTIONS.reset()
            print("Containers: %5.2f" % (val[0] / diff),
                  "Elections failed: %5.2f/s total: %d" %
                  (elections[0] / diff, ELECTIONS.total()[0]),
                  " " * 20,
                  end='\r')
            sys.stdout.flush()

        QUEUE.join()
        val = COUNTERS.total()
        total_containers['files'] += val[0]
        total_containers['size'] += val[1]
        total_containers['elapsed'] += COUNTERS.time()

    print("""
Objects:
    - ran during {o[elapsed]:5.2f}
    - {o[files]} objects removed (size {size})
    - {o_file_avg:5.2f} objects/s ({o_size_avg} avg. size/s)
""".format(o=total_objects,
           size=show(total_objects['size'], True),
           o_file_avg=total_objects['files'] / total_objects['elapsed'],
           o_size_avg=show(total_objects['size'] / total_objects['elapsed'],
                           True)))

    print("""
Containers:
    - ran during {o[elapsed]:5.2f}
    - {o[files]} containers
    - {o_file_avg:5.2f} containers/s
""".format(o=total_containers,
           o_file_avg=total_containers['files'] / total_containers['elapsed']))

    print("Elections failed: %d" % ELECTIONS.total()[0])
示例#17
0
class Crawler(object):
    """
    A crawler will traverse all the pages of a site and process the content
    in a defined way.

    :param init_urls: the very first urls to start with.
    :param q: the queue that stores all urls to be crawled
    :param urls: a set stores all urls already crawled
    """

    def __init__(self, init_urls, max_workers=200):
        self.init_urls = init_urls
        self.max_workers = max_workers
        self.q = Queue()
        self.urls = set()
        self.s = requests.Session()
        self.root_hosts = set()
        for url in init_urls:
            self.q.put(url)
            self.urls.add(url)
            self.root_hosts.add(get_netloc(url))

    def url_allowed(self, url):
        """Check if given url will be crawled.

        Current, only if the url belongs to the same host as init_urls.
        """
        return get_netloc(url) in self.root_hosts


    def save(self, response):
        """Save data at the given url."""
        raise NotImplementedError(
            "Please implement your own save logic in subclass.")

    def parse(self, response):
        self.save(response)

        new_links = set()

        for url in self.find_links(response):
            if url not in self.urls and self.url_allowed(url):
                new_links.add(url)
                self.urls.add(url)
                self.q.put(url)
        if len(new_links) != 0:
            print("Find %d new urls to crawl" % len(new_links))


    def fetch(self, url):
        """Fetch content of the url from network."""

        response = self.s.get(url)
        print("Getting content from %s, length: %d" % (url,
                                                       len(response.content)))
        return response

    def work(self, i):
        """Define the work process.

        Retrieve a url from queue, fetch the content from it,
        process it and get new urls to crawl.
        Continue the process until all pages are crawled.

        :param i: indicate the worker number
        """
        while True:
            url = self.q.get()
            print("Worker %d: Getting url %s from queue." % (i, url))
            response = self.fetch(url)
            self.parse(response)
            self.q.task_done()

    def run(self):
        """Start the crawling process.

        This is the main entrance for our crawler. It will start several
        workers, crawling in parallel.
        """
        pool = eventlet.GreenPool()
        start = time.time()
        for i in range(self.max_workers):
            pool.spawn(self.work, i)

        self.q.join()
        end = time.time()

        print("Finished crawling, takes %s seconds." % str(end - start))
        print("Have fun hacking!")
示例#18
0
class Interpreter(object):
    '''
    The class repsonsible for keeping track of the execution of the 
    statemachine.
    '''
    def __init__(self):
        self.running = True
        self.configuration = OrderedSet()
        
        self.internalQueue = Queue()
        self.externalQueue = Queue()
        
        self.statesToInvoke = OrderedSet()
        self.historyValue = {}
        self.dm = None
        self.invokeId = None
        self.parentId = None
        self.logger = None
    
    
    def interpret(self, document, invokeId=None):
        '''Initializes the interpreter given an SCXMLDocument instance'''
        
        self.doc = document
        self.invokeId = invokeId
        
        transition = Transition(document.rootState)
        transition.target = document.rootState.initial
        transition.exe = document.rootState.initial.exe
        
        self.executeTransitionContent([transition])
        self.enterStates([transition])
        
    
    
    def mainEventLoop(self):
        while self.running:
            enabledTransitions = None
            stable = False
                
            # now take any newly enabled null transitions and any transitions triggered by internal events
            while self.running and not stable:
                enabledTransitions = self.selectEventlessTransitions()
                if not enabledTransitions:
                    if self.internalQueue.empty(): 
                        stable = True
                    else:
                        internalEvent = self.internalQueue.get() # this call returns immediately if no event is available
                        
                        self.logger.info("internal event found: %s", internalEvent.name)
                        
                        self.dm["__event"] = internalEvent
                        enabledTransitions = self.selectTransitions(internalEvent)

                if enabledTransitions:
                    self.microstep(enabledTransitions)
#                eventlet.greenthread.sleep()
            eventlet.greenthread.sleep()
                
                    
            
            for state in self.statesToInvoke:
                for inv in state.invoke:
                    inv.invoke(inv)
            self.statesToInvoke.clear()
            
            if not self.internalQueue.empty():
                continue
            
            externalEvent = self.externalQueue.get() # this call blocks until an event is available
            
            if externalEvent.name == "cancel.invoke.%s" % self.dm.sessionid:
                continue
            
            self.logger.info("external event found: %s", externalEvent.name)
            
            self.dm["__event"] = externalEvent
            
            for state in self.configuration:
                for inv in state.invoke:
                    if inv.invokeid == externalEvent.invokeid:  # event is the result of an <invoke> in this state
                        self.applyFinalize(inv, externalEvent)
                    if inv.autoforward:
                        inv.send(externalEvent)
            
            enabledTransitions = self.selectTransitions(externalEvent)
            if enabledTransitions:
                self.microstep(enabledTransitions)
            
              
        # if we get here, we have reached a top-level final state or some external entity has set running to False        
        self.exitInterpreter()  
         
    
        
    def exitInterpreter(self):
        statesToExit = sorted(self.configuration, key=exitOrder)
        for s in statesToExit:
            for content in s.onexit:
                self.executeContent(content)
            for inv in s.invoke:
                self.cancelInvoke(inv)
            self.configuration.delete(s)
            if isFinalState(s) and isScxmlState(s.parent):
                if self.invokeId and self.parentId and self.parentId in self.dm.sessions:
                    self.send(["done", "invoke", self.invokeId], s.donedata(), self.invokeId, self.dm.sessions[self.parentId].interpreter.externalQueue)   
                self.logger.info("Exiting interpreter")
                dispatcher.send("signal_exit", self, final=s.id)
                return
        
        dispatcher.send("signal_exit", self, final=None)
            
        
    def selectEventlessTransitions(self):
        enabledTransitions = OrderedSet()
        atomicStates = filter(isAtomicState, self.configuration)
        atomicStates = sorted(atomicStates, key=documentOrder)
        for state in atomicStates:
            done = False
            for s in [state] + getProperAncestors(state, None):
                if done: break
                for t in s.transition:
                    if not t.event and self.conditionMatch(t): 
                        enabledTransitions.add(t)
                        done = True
                        break
        filteredTransitions = self.filterPreempted(enabledTransitions)
        return filteredTransitions
    
    
    def selectTransitions(self, event):
        enabledTransitions = OrderedSet()
        atomicStates = filter(isAtomicState, self.configuration)
        atomicStates = sorted(atomicStates, key=documentOrder)

        for state in atomicStates:
            done = False
            for s in [state] + getProperAncestors(state, None):
                if done: break
                for t in s.transition:
                    if t.event and nameMatch(t.event, event.name.split(".")) and self.conditionMatch(t):
                        enabledTransitions.add(t)
                        done = True
                        break
                    
        filteredTransitions = self.filterPreempted(enabledTransitions)
        return filteredTransitions
    
    
    def preemptsTransition(self, t, t2):
        
        if self.isType1(t): return False
        elif self.isType2(t) and self.isType3(t2): return True
        elif self.isType3(t): return True
        
        return False
    
    def getCommonParallel(self, states):
        ancestors = set(getProperAncestors(states[0], None))
        
        for s in states[1:]:
            ancestors = ancestors.intersection(getProperAncestors(s, None))
        
        if ancestors:
            return sorted(ancestors, key=exitOrder)[0]
    
    
    def isType1(self, t):
        return not t.target
    
    def isType2(self, t):
        source = t.source if t.type == "internal" else t.source.parent
        p = self.getCommonParallel([source] + self.getTargetStates(t.target))
        return not isScxmlState(p)
            
    
    def isType3(self, t):
        return not self.isType2(t) and not self.isType1(t)
    
    
    def filterPreempted(self, enabledTransitions):
        filteredTransitions = []
        for t in enabledTransitions:
            # does any t2 in filteredTransitions preempt t? if not, add t to filteredTransitions
            if not any(map(lambda t2: self.preemptsTransition(t2, t), filteredTransitions)):
                filteredTransitions.append(t)
        
        return OrderedSet(filteredTransitions)
    
    
    def microstep(self, enabledTransitions):
        self.exitStates(enabledTransitions)
        self.executeTransitionContent(enabledTransitions)
        self.enterStates(enabledTransitions)
        self.logger.info("new config: {" + ", ".join([s.id for s in self.configuration if s.id != "__main__"]) + "}")
    
    
    def exitStates(self, enabledTransitions):
        statesToExit = OrderedSet()
        for t in enabledTransitions:
            if t.target:
                tstates = self.getTargetStates(t.target)
                if t.type == "internal" and isCompoundState(t.source) and all(map(lambda s: isDescendant(s,t.source), tstates)):
                    ancestor = t.source
                else:
                    ancestor = self.findLCA([t.source] + tstates)
                
                for s in self.configuration:
                    if isDescendant(s,ancestor):
                        statesToExit.add(s)
        
        for s in statesToExit:
            self.statesToInvoke.delete(s)
        
        statesToExit.sort(key=exitOrder)
        
        for s in statesToExit:
            for h in s.history:
                if h.type == "deep":
                    f = lambda s0: isAtomicState(s0) and isDescendant(s0,s)
                else:
                    f = lambda s0: s0.parent == s
                self.historyValue[h.id] = filter(f,self.configuration) #+ s.parent 
        for s in statesToExit:
            for content in s.onexit:
                self.executeContent(content)
            for inv in s.invoke:
                self.cancelInvoke(inv)
            self.configuration.delete(s)
    
        
    def cancelInvoke(self, inv):
        inv.cancel()
    
    def executeTransitionContent(self, enabledTransitions):
        for t in enabledTransitions:
            self.executeContent(t)
    
    
    def enterStates(self, enabledTransitions):
        statesToEnter = OrderedSet()
        statesForDefaultEntry = OrderedSet()
        for t in enabledTransitions:
            if t.target:
                tstates = self.getTargetStates(t.target)
                if t.type == "internal" and isCompoundState(t.source) and all(map(lambda s: isDescendant(s,t.source), tstates)):
                    ancestor = t.source
                else:
                    ancestor = self.findLCA([t.source] + tstates)
                for s in tstates:
                    self.addStatesToEnter(s,statesToEnter,statesForDefaultEntry)
                for s in tstates:
                    for anc in getProperAncestors(s,ancestor):
                        statesToEnter.add(anc)
                        if isParallelState(anc):
                            for child in getChildStates(anc):
                                if not any(map(lambda s: isDescendant(s,child), statesToEnter)):
                                    self.addStatesToEnter(child, statesToEnter,statesForDefaultEntry)

        statesToEnter.sort(key=enterOrder)
        for s in statesToEnter:
            self.statesToInvoke.add(s)
            self.configuration.add(s)
            if self.doc.binding == "late" and s.isFirstEntry:
                s.initDatamodel()
                s.isFirstEntry = False

            for content in s.onentry:
                self.executeContent(content)
            if s in statesForDefaultEntry:
                self.executeContent(s.initial)
            if isFinalState(s):
                parent = s.parent
                grandparent = parent.parent
                self.internalQueue.put(Event(["done", "state", parent.id], s.donedata()))
                if isParallelState(grandparent):
                    if all(map(self.isInFinalState, getChildStates(grandparent))):
                        self.internalQueue.put(Event(["done", "state", grandparent.id]))
        for s in self.configuration:
            if isFinalState(s) and isScxmlState(s.parent):
                self.running = False;
    
    
    def addStatesToEnter(self, state,statesToEnter,statesForDefaultEntry):
        if isHistoryState(state):
            if state.id in self.historyValue:
                for s in self.historyValue[state.id]:
                    self.addStatesToEnter(s, statesToEnter, statesForDefaultEntry)
                    for anc in getProperAncestors(s,state):
                        statesToEnter.add(anc)
            else:
                for t in state.transition:
                    for s in self.getTargetStates(t.target):
                        self.addStatesToEnter(s, statesToEnter, statesForDefaultEntry)
        else:
            statesToEnter.add(state)
            if isCompoundState(state):
                statesForDefaultEntry.add(state)
                for s in self.getTargetStates(state.initial):
                    self.addStatesToEnter(s, statesToEnter, statesForDefaultEntry)
            elif isParallelState(state):
                for s in getChildStates(state):
                    self.addStatesToEnter(s,statesToEnter,statesForDefaultEntry)
    
    def isInFinalState(self, s):
        if isCompoundState(s):
            return any(map(lambda s: isFinalState(s) and s in self.configuration, getChildStates(s)))
        elif isParallelState(s):
            return all(map(self.isInFinalState, getChildStates(s)))
        else:
            return False
    
    def findLCA(self, stateList):
        for anc in filter(isCompoundState, getProperAncestors(stateList[0], None)):
#        for anc in getProperAncestors(stateList[0], None):
            if all(map(lambda(s): isDescendant(s,anc), stateList[1:])):
                return anc
示例#19
0
文件: ec.py 项目: lanweichang/oio-sds
class ECWriter(object):
    """
    Writes an EC chunk
    """
    def __init__(self, chunk, conn):
        self._chunk = chunk
        self._conn = conn
        self.failed = False
        self.bytes_transferred = 0
        self.checksum = hashlib.md5()

    @property
    def chunk(self):
        return self._chunk

    @property
    def conn(self):
        return self._conn

    @classmethod
    def connect(cls, chunk, sysmeta):
        raw_url = chunk["url"]
        parsed = urlparse(raw_url)
        chunk_path = parsed.path.split('/')[-1]
        h = {}
        h["transfer-encoding"] = "chunked"
        h[chunk_headers["content_id"]] = sysmeta['id']
        h[chunk_headers["content_path"]] = sysmeta['content_path']
        h[chunk_headers["content_chunkmethod"]] = sysmeta['chunk_method']
        h[chunk_headers["container_id"]] = sysmeta['container_id']
        h[chunk_headers["chunk_pos"]] = chunk["pos"]
        h[chunk_headers["chunk_id"]] = chunk_path
        h[chunk_headers["content_policy"]] = sysmeta['policy']
        h[chunk_headers["content_version"]] = sysmeta['version']

        # in the trailer
        # metachunk_size & metachunk_hash
        h["Trailer"] = (chunk_headers["metachunk_size"],
                        chunk_headers["metachunk_hash"])
        with ConnectionTimeout(io.CONNECTION_TIMEOUT):
            conn = io.http_connect(parsed.netloc, 'PUT', parsed.path, h)
            conn.chunk = chunk
        return cls(chunk, conn)

    def start(self, pool):
        # we use eventlet Queue to pass data to the send coroutine
        self.queue = Queue(io.PUT_QUEUE_DEPTH)
        # spawn the send coroutine
        pool.spawn(self._send)

    def _send(self):
        # this is the send coroutine loop
        while True:
            # fetch input data from the queue
            d = self.queue.get()
            # use HTTP transfer encoding chunked
            # to write data to RAWX
            if not self.failed:
                # format the chunk
                to_send = "%x\r\n%s\r\n" % (len(d), d)
                try:
                    with ChunkWriteTimeout(io.CHUNK_TIMEOUT):
                        self.conn.send(to_send)
                        self.bytes_transferred += len(d)
                except (Exception, ChunkWriteTimeout) as e:
                    self.failed = True
                    msg = str(e)
                    logger.warn("Failed to write to %s (%s)", self.chunk, msg)
                    self.chunk['error'] = msg

            self.queue.task_done()

    def wait(self):
        # wait until all data in the queue
        # has been processed by the send coroutine
        if self.queue.unfinished_tasks:
            self.queue.join()

    def send(self, data):
        # do not send empty data because
        # this will end the chunked body
        if not data:
            return
        # put the data to send into the queue
        # it will be processed by the send coroutine
        self.queue.put(data)

    def finish(self, metachunk_size, metachunk_hash):
        parts = [
            '0\r\n',
            '%s: %s\r\n' % (chunk_headers['metachunk_size'], metachunk_size),
            '%s: %s\r\n' % (chunk_headers['metachunk_hash'], metachunk_hash),
            '\r\n'
        ]
        to_send = "".join(parts)
        self.conn.send(to_send)

    def getresponse(self):
        # read the HTTP response from the connection
        with Timeout(io.CHUNK_TIMEOUT):
            self.resp = self.conn.getresponse()
            return self.resp