Exemplos de Crawler.Crawler em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils.crawler

Classe / Tipo: Crawler

Método / Função: Crawler

Exemplos em hotexamples.com: 6

Crawler.Crawler em Python - 6 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.crawler.Crawler.Crawler em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Crawler(6)

download_bin(1)

Métodos Frequentes

Crawler (6)

download_bin (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: downloader.py Projeto: xiaoxixu/mooc-dl

def __init__(self, url, path, overwrite=False, spider=Crawler()): self.url = url self.path = path self.tmp_path = self.path + '.t' self.name = os.path.split(self.path)[-1] self.overwrite = overwrite self.spider = spider self._status = INITIALIZED self.total = 0 self.size = 0

Exemplo n.º 2

0

Exibir arquivo

def __init__(self, num_thread, segment_size, overwrite=False, spider=Crawler()): self.files = [] self.pool = ThreadPool(num_thread) self.overwrite = overwrite self.spider = spider self.segment_size = segment_size

Exemplo n.º 3

0

Exibir arquivo

def __init__(self, url, path, segment_size=10 * 1024 * 1024, overwrite=False, spider=Crawler()): self.url = url self.path = path self.name = os.path.split(self.path)[-1] self.overwrite = overwrite self.spider = spider self.segment_size = segment_size self._status = INITIALIZED self.segmentable = False self.total = 0 self.segments = [] self._get_head() self._segmentation()

Exemplo n.º 4

0

Exibir arquivo

import re import os import sys import time from urllib.parse import urlencode from bs4 import BeautifulSoup from utils.crawler import Crawler from utils.config import Config from utils.thread import ThreadPool from utils.common import Task, repair_filename, touch_dir, size_format from utils.playlist import Dpl from utils.downloader import FileManager spider = Crawler() VIDEO, PDF, RICH_TEXT = 1, 3, 4 COURSEWARE = { VIDEO: 'Video', PDF: 'PDF', RICH_TEXT: 'Rich_text' } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', } spider.headers.update(headers) CONFIG = Config() def login(username, password):

Exemplo n.º 5

0

Exibir arquivo

# -*- coding: utf-8 -*- """网易公开课""" import time from bs4 import BeautifulSoup from Crypto.Cipher import AES from moocs.utils import * from utils.crawler import Crawler name = "open_163" need_cookies = False CANDY = Crawler() CONFIG = {} FILES = {} VIDEOS = [] exports = {} __all__ = ["name", "need_cookies", "start", "exports"] def get_summary(url): """从课程主页面获取信息""" res = CANDY.get(url).text soup = BeautifulSoup(res, 'html.parser') links = [] if re.match(r'https?://open.163.com/special/', url): # 从课程主页解析各课程链接 names = soup.find_all('div', class_='g-container')[1] organization = names.find('a').string.strip()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: getter.py Projeto: jiangyg/ZWFproject

def __init__(self): self.redis = RedisClient() self.crawler = Crawler()