Пример #1
0
    def setUp(self):
        url_patterns = UrlPatterns(
            Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item',
                FakeWikiParser))
        fake_user_conf = Config(StringIO(user_conf))

        self.dir = tempfile.mkdtemp()

        self.job = Job(
            'fake wiki crawler',
            url_patterns,
            MechanizeOpener, [
                'http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F',
            ],
            user_conf=fake_user_conf)

        local_node = 'localhost:%s' % self.job.context.job.port
        nodes = [
            local_node,
        ]

        self.rpc_server = ColaRPCServer(
            ('localhost', self.job.context.job.port))
        self.loader = JobLoader(self.job)
        self.loader.init_mq(self.rpc_server, nodes, local_node, self.dir)

        thd = threading.Thread(target=self.rpc_server.serve_forever)
        thd.setDaemon(True)
        thd.start()
Пример #2
0
    def __init__(self, user_conf=None, **user_defines):
        self.main_conf = main_conf
        if user_conf is not None:
            if isinstance(user_conf, str):
                self.user_conf = Config(user_conf)
            else:
                self.user_conf = user_conf
        else:
            self.user_conf = PropertyObject(dict())
        self.user_defines = PropertyObject(user_defines)

        dicts = PropertyObject({})
        for obj in (self.main_conf, self.user_conf, self.user_defines):
            dicts.update(obj)
        for k in dicts:
            if not k.startswith('_'):
                setattr(self, k, getattr(dicts, k))
Пример #3
0
    def setUp(self):
        url_patterns = UrlPatterns(
            Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item',
                FakeWikiParser))
        fake_user_conf = Config(StringIO(user_conf))

        self.dir = tempfile.mkdtemp()

        self.job = Job(
            'fake wiki crawler',
            url_patterns,
            MechanizeOpener, [
                'http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F',
            ],
            user_conf=fake_user_conf)

        self.local_node = 'localhost:%s' % self.job.context.job.port
        self.nodes = [
            self.local_node,
        ]
Пример #4
0
from cola.core.config import Config
from cola.core.utils import get_ip, import_job_desc, Clock
from cola.core.logs import get_logger
from cola.core.mq import MessageQueue
from cola.core.dedup import FileBloomFilterDeduper, MapDeduper
from cola.core.rpc import ThreadedColaRPCServer, client_call
from cola.core.zip import ZipHandler
from cola.functions.budget import BudgetApplyServer
from cola.functions.speed import SpeedControlServer
from cola.functions.counter import CounterServer
from cola.job import Job, FINISHED, IDLE
from cola.cluster.master import Master
from cola.cluster.worker import Worker

conf_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'conf')
main_conf = Config(os.path.join(conf_dir, 'main.yaml'))

MAX_IDLE_TIMES = 50


class ContextManager(multiprocessing.managers.SyncManager):
    pass


ContextManager.register('FileBloomFilterDeduper', FileBloomFilterDeduper)
ContextManager.register('MapDeduper', MapDeduper)
ContextManager.register('mq', MessageQueue)
ContextManager.register('budget_server', BudgetApplyServer)
ContextManager.register('speed_server', SpeedControlServer)
ContextManager.register('counter_server', CounterServer)
Пример #5
0
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Created on 2013-6-27

@author: Chine
'''

import os

from cola.core.config import Config

base = os.path.dirname(os.path.abspath(__file__))
user_conf = os.path.join(base, 'test.yaml')
if not os.path.exists(user_conf):
    user_conf = os.path.join(base, 'weibosearch.yaml')
user_config = Config(user_conf)

mongo_host = user_config.job.mongo.host
mongo_port = user_config.job.mongo.port
db_name = user_config.job.db

instances = user_config.job.instances
Пример #6
0
 def setUp(self):
     self.simulate_user_conf = Config(StringIO('name: cola-unittest'))
Пример #7
0
# -*- coding: utf-8 -*-
'''
Copyright (c) 2013 Qin Xuye <*****@*****.**>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Created on 2013-5-25

@author: Chine
'''

import os

from cola.core.config import Config

conf_base_path = os.path.join(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'conf')
conf_path = lambda name: os.path.join(conf_base_path, name)

main_conf = Config(conf_path('main.yaml'))
Пример #8
0
Created on 2013-6-9

@author: Chine
'''

import os

from cola.core.config import Config
from pymongo import MongoClient
import random

base = os.path.dirname(os.path.abspath(__file__))
user_conf = os.path.join(base, 'test.yaml')
if not os.path.exists(user_conf):
    user_conf = os.path.join(base, 'weibo.yaml')
user_config = Config(user_conf)

startsfile = os.path.join(base, 'uid.yaml')
startlist = Config(startsfile)

starts = [str(start.uid) for start in startlist.starts]
random.shuffle(starts)
mongo_host = user_config.job.mongo.host
mongo_port = user_config.job.mongo.port
db_name = user_config.job.db
client = MongoClient(mongo_host, mongo_port)
db = client[db_name]
dbuid = []
for u in db.weibo_user.find():
    dbuid.append(u['uid'])