示例#1
0
 def schedule_getter(self, cycle=GETTER_CYCLE):
     """定时获取代理"""
     getter = Getter()
     while True:
         print('开始抓取代理')
         getter.run()
         time.sleep(cycle)
示例#2
0
文件: scheduler.py 项目: ttly20/mypp
 def schedule_getter(self, cycle=GETTER_CYCLE):
     """Cycle get proxy"""
     coroutine = Getter()
     while True:
         loop = asyncio.get_event_loop()
         loop.run_until_complete(coroutine.run())
         time.sleep(cycle)
示例#3
0
 def run_getter(self, cycle=GETTER_CYCLE):
     """定时获取cookie"""
     getter = Getter()
     while True:
         print('开始抓取cookies')
         getter.run()
         time.sleep(cycle)
示例#4
0
class WebStash:
    def __init__(self, getterType='urlopen', waitTimeBeforeScraping=0):
        self.cacher = Cacher()
        self.config = Config()
        self.getter = Getter(getterType,
                             waitTimeBeforeScraping=waitTimeBeforeScraping)

    def get_web_data(self, url):
        try:
            return self.cacher[url]
        except KeyError:
            self.config.debugPrint('Getting webData...')
            filename = self.cacher.getFilename(url)
            html = self.getter.get_html(url)
            screenshotLocation = self.getter.get_screenshot(
                url, filename + '.png')
            webData = WebData(filename,
                              url,
                              html,
                              screenshotLocation=screenshotLocation)
            self.cacher[url] = webData
            return self.cacher[url]

    def delete(url):
        del self.cacher[url]

    def clean(self):
        self.cacher.clean()
示例#5
0
 def schdule_getter(self, cycle=GETTER_CYCLE):
     """定时获取代理"""
     getter = Getter()
     while 1:
         print('start crawl proxy...')
         getter.run()
         time.sleep(cycle)
示例#6
0
 def schedule_getter(self, cycle=settings.GETTER_CYCLE):
     '''
     定时抓取
     '''
     getter = Getter()
     while True:
         getter.run()
         time.sleep(cycle)
示例#7
0
 def scheduler_getter(self, cycle=GETTER_CYCLE):
     # 调度获取模块
     '''定时获取代理'''
     getter = Getter()
     while True:
         print('开始抓取代理')
         getter.run()
         time.sleep(cycle)
示例#8
0
 def schedule_getter(self, cycle=GETTER_CYCLE):
     """
     定时获取代理
     """
     getter = Getter()
     while GETTER_ENABLED:
         print('抓取器开始运行')
         getter.run()
         time.sleep(cycle)
示例#9
0
 def scheduler_getter(self):
     """
     每隔5天获取一次cookies
     :return:
     """
     while True:
         getter = Getter()
         getter.run()
         time.sleep(self.getter_cycle)
示例#10
0
    def schedule_getter(self, cycle=GETTER_CYCLE):
        #定时获取代理
        getter = Getter()
        count = 0
        while True:
            count += 1
            print('获取器开始运行', count)
            getter.run()

            time.sleep(cycle)
示例#11
0
文件: run.py 项目: sky94520/Proxy
    def schedule_getter(self, cycle=GETTER_CYCLE):
        """
        定时获取代理
        """
        getter = Getter()

        while True:
            print('Start crawling proxy')
            getter.run()
            time.sleep(cycle)
示例#12
0
    def run(self, q):
        ## handles the publishing of data, should be run at an acceptable update interval
        # get data from queue
        data = q[self.name]

        # TODO: format data for publisher
        #print("nearest obs data ", data)

        # publish data
        Getter.run(self, data)
示例#13
0
 def init(self, ros, hostname):
     self.hostname = hostname
     # create publisher
     self.pub = ros.Publisher('/' + hostname + '/lidar/' +
                              resource_name(self.name),
                              Int16MultiArray,
                              queue_size=1)
     # pass to parent to enble probes
     #self.mp.write_probe(self.codes,1)
     Getter.init(self)
示例#14
0
 def schedule_getter(self, cycle=GETTER_CYCLE):
     """
     Get the agent regularly
     """
     getter = Getter()
     while True:
         print('Start to grab the proxy')
         logger.log('INFOR', 'Start to Grab the proxy...')
         getter.run()
         time.sleep(cycle)
示例#15
0
 def schedule_getter(self, cycle=GETTER_CYCLE):
     """
     定时获取代理
     while循环运行获取器模块,运行一次之后间隔一段时间,间隔时间参数为GETTER_CYCLE配置。
     """
     getter = Getter()
     while True:
         print('开始抓取代理')
         getter.run()
         time.sleep(cycle)
示例#16
0
 def getter_scheduler(self, cycle=GETTER_CYCLE):
     """
     定时获取代理
     """
     print('获取器开始执行!')
     getter = Getter()
     while True:
         getter.run()
         print('休息', GETTER_CYCLE, '秒')
         time.sleep(GETTER_CYCLE)
示例#17
0
 def run_getter(self):
     """
     run getter
     """        
     getter = Getter()
     loop = 0
     while True:
         logger.debug(f'getter loop {loop} start...')
         getter.run()
         loop += 1
         time.sleep(20)
示例#18
0
    def login_getter(self):
        '''
        Logs into getter,

        should have no problems
        
        if one can log into poster
        '''
        self.getter = Getter(self.username, self.password)
        status = self.getter.login()
        return status
示例#19
0
 def schedule_getter(self, cycle=GETTER_CYCLE):
     """
     定时获取代理
     :param cycle:
     :return:
     """
     getter = Getter()
     while True:
         print("开始抓取代理")
         getter.run()
         time.sleep(cycle)
示例#20
0
 def process_getter(self):
     '''获取器执行子进程'''
     getter = Getter()
     redis = db()
     while True:
         if redis.count_proxie() < MAXPOOL:
             getter.run()
             time.sleep(GETTER_CYCLE)
         else:
             print("代理池已经达到最大容量")
             break
示例#21
0
 def schedule_getter(self, cycle=GETTER_CYCLE):
     '''
     定时获取代理
     :param cycle:
     :return:
     '''
     getter = Getter()
     while True:
         print('开始抓取代理')
         getter.run()
         time.sleep(cycle)
示例#22
0
    def run(self, q, current_time):
        ## handles the publishing of data, should be run at an acceptable update interval
        # get data from queue
        data = []
        labels = self.name
        if type(labels) != tuple:
            labels = (labels)

        for label in labels:
            data.append(q[label])

        # publish data
        Getter.run(self, data[0])
示例#23
0
def DBwriter():
    result = Getter()
    #print(result)
    #result=[('start_day', req_count, req_per_sec, req_per_min, req_per_hour)]

    #-------------------------------------------------------------------------------------------
    # if DB not exist, create and connect
    if not os.path.exists("mydatabase.db"):
        conn = sqlite3.connect("mydatabase.db")  # creating DB
        cursor = conn.cursor()  # creating cursor
        cursor.execute('''CREATE TABLE results
                                  (analize_date text, requests_count real, request_per_second real,
                                   request_per_minute real, request_per_hour real)
                               ''')
        # If DB exist - connecting
    else:
        conn = sqlite3.connect("mydatabase.db")
        cursor = conn.cursor()  # making cursor
    #-------------------------------------------------------------------------------------------
    # Insert data to DB table safely
    cursor.execute(
        'INSERT INTO results VALUES (?, ?, ?, ?, ?)',
        (result[0][0], result[0][1], result[0][2], result[0][3], result[0][4]))

    #-------------------------------------------------------------------------------------------
    # Save changes
    conn.commit()
    #------------------------------------------------------------------------------------------
    # Checking (output) results
    sql = "SELECT * FROM results"
    cursor.execute(sql)
    print(cursor.fetchall())
示例#24
0
 def init(self, ros, hostname):
     self.hostname = hostname
     # create publisher
     if type(self.name) != tuple:
         self.pub = ros.Publisher('/' + hostname + '/odom/' +
                                  resource_name(self.name),
                                  Odometry,
                                  queue_size=1)
     else:
         self.pub = ros.Publisher(
             '/' + hostname + '/odom/' +
             resource_name(os.path.commonprefix(self.name)),
             Odometry,
             queue_size=1)
     # pass to parent to enble probes
     Getter.init(self)
示例#25
0
def test_getterTypes():
    testurl = 'https://news.ycombinator.com/news'
    getter0 = Getter('urlopen')
    html0 = getter0.get_html(testurl)
    assert isinstance(html0, bytes)

    getter1 = Getter('chromedriver')
    html1 = getter1.get_html(testurl)
    assert isinstance(html1, str)

    getter2 = Getter('requests')
    html2 = getter2.get_html(testurl)
    assert isinstance(html2, bytes)
示例#26
0
def test_getter_wait_before_scraping():
    import datetime
    waitTimeBeforeScraping = 1
    testSleep = Getter('urlopen',
                       waitTimeBeforeScraping=waitTimeBeforeScraping)
    startTime = datetime.datetime.now()
    for i in range(3):
        testSleep.get_html('https://news.ycombinator.com/news')
    endTime = datetime.datetime.now()

    assert (endTime - startTime).seconds > 3 * waitTimeBeforeScraping

    try:
        errorgetter = Getter('this is not a getter type')
    except GetterImplementationError as e:
        assert str(
            e) == 'this is not a getter type is not a supported getter type'
示例#27
0
    def run(self, q, current_time):
        ## handles the publishing of data, should be run at an acceptable update interval
        # get data from queue

        data = q[self.name]

        # get the data from the message
        data = data.split('v=')[-1]
        if (data == ''):
            return

        # create x and y arrays from the base16 string of data
        strarr = data.split(':')
        x = []
        y = []
        (lambda a:
         (x.append(int(a[0], 16)), y.append(int(a[1], 16)), fx(a[2:]))
         if a.size > 1 else False)(strarr)

        # create pointcloud2 message
        dataout = PointCloud2()
        # add timestamp based on ros time
        msg.header.stamp = current_time
        # add frame
        msg.header.frame_id = self.hostname
        # format message
        N = len(x)
        xy = np.array(np.hstack([x, y]), dtype=np.float32)
        msg.height = 1
        msg.width = N

        msg.fields = [
            PointField('x', 0, PointField.FLOAT32, 1),
            PointField('y', 4, PointField.FLOAT32, 1),
        ]
        msg.is_bigendian = False
        msg.point_step = 8
        msg.row_step = msg.point_step * N
        msg.is_dense = True
        msg.data = xy.tostring()

        # publish data
        Getter.run(self, dataout)
class GetterThread(Thread):
    getter = None

    def __init__(self):
        super(GetterThread, self).__init__()
        self.getter = Getter()

    def update(self):
        """
        Generate a random number every 1 second and emit to a socketio instance (broadcast)
        Ideally to be run in a separate thread?
        """
        # infinite loop of magical random numbers
        while not thread_stop_event.isSet():
            self.getter.update()
            data = json.loads(self.getter.data.decode('utf-8'))
            print(data)
            socketio.emit('newnumber', data, namespace='/test')

    def run(self):
        self.update()
示例#29
0
 def schedule_getter(self):
     getter = Getter()
     while GETTER_ENABLED:
         try:
             getter.run()
         except:
             getter.run()
         time.sleep(GETTER_CYLE)
示例#30
0
    def run(self, q, current_time):
        ## handles the publishing of data, should be run at an acceptable update interval
        # get data from queue
        odom = Odometry()
        odom.header.stamp = current_time
        odom.header.frame_id = self.hostname

        data = []
        labels = self.name
        if type(labels) != tuple:
            labels = (labels)

        for label in labels:
            data.append(q[label])

        heading = tf.transformations.quaternion_from_euler(0., 0., data[2])
        odom.pose.pose = Pose(
            Point(data[0], data[1], 0.),
            Quaternion(heading[0], heading[1], heading[2], heading[3]))
        odom.twist.twist = Twist(Vector3(data[3], 0, 0),
                                 Vector3(0, 0, data[4]))

        # publish data
        Getter.run(self, odom)