示例#1
0
 def __init__(self, buffer, config_path=None):
     self.buffer = buffer
     self.config = Config.setup_main_config(
         os.path.join(config_path, 'yandex.yml'))
     self.result = []
     self._cleaner = None
     self._logger = logging.getLogger('crawler')
示例#2
0
import requests
import os, sys
import lxml.html as html
import time
import logging.config
import consul
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
from helper.config import Config

CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
config_path = os.path.join(CURRENT_DIR, '..', 'config')

main_config = Config.setup_main_config(os.path.join(config_path, 'main.yml'))
logging.config.fileConfig(os.path.join(config_path, 'logging.conf'))

class ProxyManager(object):

    def __init__(self):
        self.headers = {
            'User-Agent': 'Lynx/2.8.9dev.8 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/3.4.9',
            'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'ru,en-US;q=0.7,en;q=0.3',
            'X-Requested-With': 'XMLHttpRequest',
            'Connection': 'Keep-Alive',
            'Host': 'yandex.ru'
        }

        self.url = 'https://yandex.ru/search/?text=qwerty&lr=213'
        self._logger = logging.getLogger(__name__)
        self.download_url = 'http://api.foxtools.ru/v2/Proxy.txt' \
示例#3
0
import os
import sys
import pika
import json
import redis
from collections import namedtuple
sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
from mongoengine import connect
from helper.config import Config
from proccess.proccessing import Process
import logging.config
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
config_path = os.path.join(CURRENT_DIR, '..', 'config')

main_config = Config.setup_main_config(os.path.join(config_path, 'main.yml'))
logging.config.fileConfig(os.path.join(config_path, 'logging.conf'))
params = Config.setup_main_config(os.path.join(config_path, 'rabbit.yml'))


class RabbitTask:
    def __init__(self):

        self.connection = pika.BlockingConnection(
            pika.ConnectionParameters(params.receiver.host))
        self._logger = logging.getLogger(__name__)
        self.queue = params.receiver.queue.social

        self._channel = self.connection.channel()
        self._channel.queue_declare(queue=self.queue, durable=True)
        self._channel.basic_qos(prefetch_count=1)