Exemplo n.º 1
0
 def __init__(self, url, path, overwrite=False, spider=Crawler()):
     self.url = url
     self.path = path
     self.tmp_path = self.path + '.t'
     self.name = os.path.split(self.path)[-1]
     self.overwrite = overwrite
     self.spider = spider
     self._status = INITIALIZED
     self.total = 0
     self.size = 0
Exemplo n.º 2
0
 def __init__(self,
              num_thread,
              segment_size,
              overwrite=False,
              spider=Crawler()):
     self.files = []
     self.pool = ThreadPool(num_thread)
     self.overwrite = overwrite
     self.spider = spider
     self.segment_size = segment_size
Exemplo n.º 3
0
 def __init__(self,
              url,
              path,
              segment_size=10 * 1024 * 1024,
              overwrite=False,
              spider=Crawler()):
     self.url = url
     self.path = path
     self.name = os.path.split(self.path)[-1]
     self.overwrite = overwrite
     self.spider = spider
     self.segment_size = segment_size
     self._status = INITIALIZED
     self.segmentable = False
     self.total = 0
     self.segments = []
     self._get_head()
     self._segmentation()
Exemplo n.º 4
0
import re
import os
import sys
import time

from urllib.parse import urlencode
from bs4 import BeautifulSoup

from utils.crawler import Crawler
from utils.config import Config
from utils.thread import ThreadPool
from utils.common import Task, repair_filename, touch_dir, size_format
from utils.playlist import Dpl
from utils.downloader import FileManager

spider = Crawler()
VIDEO, PDF, RICH_TEXT = 1, 3, 4
COURSEWARE = {
    VIDEO: 'Video',
    PDF: 'PDF',
    RICH_TEXT: 'Rich_text'
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
}
spider.headers.update(headers)
CONFIG = Config()


def login(username, password):
Exemplo n.º 5
0
# -*- coding: utf-8 -*-
"""网易公开课"""

import time

from bs4 import BeautifulSoup
from Crypto.Cipher import AES

from moocs.utils import *
from utils.crawler import Crawler

name = "open_163"
need_cookies = False
CANDY = Crawler()
CONFIG = {}
FILES = {}
VIDEOS = []
exports = {}
__all__ = ["name", "need_cookies", "start", "exports"]


def get_summary(url):
    """从课程主页面获取信息"""

    res = CANDY.get(url).text
    soup = BeautifulSoup(res, 'html.parser')
    links = []
    if re.match(r'https?://open.163.com/special/', url):
        # 从课程主页解析各课程链接
        names = soup.find_all('div', class_='g-container')[1]
        organization = names.find('a').string.strip()
Exemplo n.º 6
0
 def __init__(self):
     self.redis = RedisClient()
     self.crawler = Crawler()