Пример #1
0
    def parse(self, filename):
        """
    Parses Netscape HTTP Cookie files. The format of a cookie file is described here:
    http://www.cookiecentral.com/faq/#3.5

    Content sample:
    # Netscape HTTP Cookie File
    # http://www.netscape.com/newsref/std/cookie_spec.html
    # This is a generated file!  Do not edit.
    #HttpOnly_login.corp.google.com FALSE/TRUE 1443186342 name value

    :param filename: The path where to find the cookie file.
    :return: None
    """
        with open(filename, "r") as file:
            for line in file:

                line = line.strip()

                # Skip empty line.
                if not line:
                    continue

                # Skip comments.
                if line.startswith("# "):
                    continue

                cookie = Cookie(line)
                self.cookies.append(cookie)
Пример #2
0
 def post(self):
   mobile = self.request.get('mobile')
   browser = self.request.get('browser')
   logging.info("Mobile: %s, User Agent: %s Browser %s" % (mobile, self.request.user_agent, browser))
   self.response.headers['Content-Type'] = 'text/html'
   if bool(RE_MOBILE_NUMBER.search(mobile)) and len(mobile) == 10 and mobile != '9876543210':
     path = os.path.join(os.path.dirname(__file__), "../registration_done.html")
     learner = Learner.retrieve(Learner, mobile)
     if not learner:
       learner = Learner()
       learner.MobileNumber = db.PhoneNumber(mobile)
       learner.UserAgent = self.request.user_agent
       learner.Channel = 'WEB' if (browser == 'true') else 'APP'
       learner.MotherTongue = 1 # Hindi
       learner.Status = simplejson.dumps({'id': str(mobile)})
       learner.put()
       self.response.headers['Set-Cookie'] =  Cookie.get_maza_cookie_str(learner.Status)
       if browser == 'false':
         self.redirect('/' + mobile)
     else: 
       # Learner already existed, take him to lesson 1
       self.redirect('/' + mobile);
   else:
     path = os.path.join(os.path.dirname(__file__), "../registration_fail.html")
   self.response.out.write(template.render(path, {'mobile' : mobile}))
Пример #3
0
def enter():

    global stage, ground, hp, cookie, jelly, potion, jtrap, djtrap, strap
    global scoreLabel, Label
    global jelly_sound, item_sound, collide_sound

    stage = Stage()  # 스테이지
    ground = Ground()  # 바닥
    hp = HP()  # 체력
    cookie = Cookie()  # 캐릭터

    game_world.add_object(stage, game_world.layer_bg)
    game_world.add_object(ground, game_world.layer_bg)
    game_world.add_object(hp, game_world.layer_bg)
    game_world.add_object(cookie, game_world.layer_player)

    # 점수
    label = score.Label("Score: ", 50, get_canvas_height() - 50, 45, 0)
    label.color = (255, 255, 255)
    score.labels.append(label)
    scoreLabel = label

    # 사운드
    jelly_sound = load_wav('jelly.wav')
    jelly_sound.set_volume(32)
    item_sound = load_wav('item.wav')
    item_sound.set_volume(32)
    collide_sound = load_wav('collide.wav')
    collide_sound.set_volume(50)
Пример #4
0
    def login(self):
        "Login process. Returns cookie in HTTP header raw text"

        # Checks
        if not self.username or not self.password:
            print("No credentials to use for login!")
            return False

        if not self.userfile:
            print("No user file DB was set!")
            return False

        # Loop through the file with users
        fd = open(self.userfile, "r")

        for user in fd.readlines():
            username, password = user.split(":")

            # If the user is found in the list and the corresponding password is correct
            if username == self.username:
                if password == self.password:
                    usercookie = Cookie().create_login()
                    return usercookie

        return False
Пример #5
0
 def __init__(self, response, encoding='utf-8', is_json=False):
     self.is_json = is_json
     self.encoding = encoding
     self._headers = {a.lower(): b for (a, b) in response.info().items()}
     self.cookies = Cookie.parse_cookies(self._headers.get('set-cookie', ''))
     self.raw_body = response.read()
     self.code = response.code
     response.close()
Пример #6
0
 def __init__(self, response, encoding='utf-8', is_json=False):
     self.is_json = is_json
     self.encoding = encoding
     self._headers = {a.lower(): b for (a, b) in response.info().items()}
     self.cookies = Cookie.parse_cookies(self._headers.get(
         'set-cookie', ''))
     self.raw_body = response.read()
     self.code = response.code
     response.close()
Пример #7
0
    def logout(self):
        """
        Logout process
        Returns a login cookie with negative expire value, so it gets
        deleted on client side
        """

        logoutcookie = Cookie().create_custom("logged", "no", -10)
        return logoutcookie
Пример #8
0
def enter():
    global Board, scoreLabel, cookie, scoretemp
    Board = ScoreBoard()
    cookie = Cookie()

    # 스코어
    label = score.Label("Score: ", 60, 300, 80, 0)
    label.color = (0, 0, 0)
    score.labels.append(label)
    scoreLabel = label
    str = "Score: {:0.0f}".format(scoretemp)
    scoreLabel.text = str
Пример #9
0
def enter():
    global cookie, stage, pet, game_timer, gameinfo, hp_time
    game_timer = get_time()
    hp_time = get_time()
    cookie = Cookie()
    gameinfo = GameInfo()
    stage = Stage()
    pet = Pet()
    game_world.objects = [[], [], [], []]
    game_world.add_object(stage, 0)
    game_world.add_object(cookie, 1)
    game_world.add_object(pet, 2)
    game_world.add_object(gameinfo, 3)
Пример #10
0
  def get(self):
    self.response.headers['Content-Type'] = 'text/cache-manifest'
    self.response.headers['Cache-Control'] = 'max-age=10'
    
    # Retrieve tracking cookie to find stats for this user
    cookie, mobile, learner = Cookie.parse_maza(self.request.cookies)
    if learner:
      learner.Status = cookie
      learner.put()

    filename = os.path.join(os.path.dirname(__file__), "../html/lesson1.mf")
    f = open(filename, "r")
    text = f.read()
    self.response.out.write(text.replace('#', '#'))
Пример #11
0
 def get(self):
   cookie, mobile, learner = Cookie.parse_maza(self.request.cookies)
   if learner:
     self.redirect('/' + mobile)
     return
   
   user_agent = self.request.user_agent
   if self.is_desktop(user_agent):
     logging.info("Desktop User Agent: %s", user_agent)
   else:
     logging.info("Mobile User Agent: %s", user_agent)
   self.response.headers['Content-Type'] = 'text/html'
   path = os.path.join(os.path.dirname(__file__), "../registration_form.html")
   self.response.out.write(template.render(path, {}))
Пример #12
0
  def get(self, mobile_number):
    learner = Learner.retrieve(Learner, mobile_number)
    if not learner:
      self.redirect('/') # redirect to registration.
      return
      
    jsonData = {
      'learner' : { 'MobileNumber' : learner.MobileNumber},
      'lesson': Lesson1Data.get_data()
    }
    jsonDataStr = simplejson.dumps(jsonData)
    self.response.headers['Content-Type'] = 'text/html'
    self.response.headers['Cache-Control'] = 'max-age=3600'
    self.response.headers['Set-Cookie'] =  Cookie.get_maza_cookie_str(learner.Status)

    filename = os.path.join(os.path.dirname(__file__), "../html/lesson1.html")
    f = open(filename, "r")
    text = f.read()
    self.response.out.write(text.replace('{{JSONDATA}}', jsonDataStr))
Пример #13
0
    def load(self, filename):
        fp = open(filename, "r")
        for i in fp:
            item = i.rstrip()
            self.map.append(list(item))
            self.row = len(self.map)
            self.col = len(self.map[0])
        self.width = self.col * self.GS
        self.height = self.row * self.GS
        fp.close()

        for i in range(self.row):
            for j in range(self.col):
                if self.map[i][j] == 'B':
                    self.blocks.add(Block((j * self.GS, i * self.GS)))
                elif self.map[i][j] == 'C':
                    self.cookies.add(
                        Cookie((j * self.GS + 10, i * self.GS + 10),
                               "./img/cookie.png"))
                elif self.map[i][j] == 'P':
                    self.powercookies.add(
                        PowerCookie((j * self.GS + 5, i * self.GS + 5),
                                    "./img/powercookie.png"))
Пример #14
0
from cookie import Cookie
import configparser
if __name__ == '__main__':
    config = configparser.ConfigParser()
    config.read('login.ini')
    username = config['DEFAULT']['username']
    password = config['DEFAULT']['password']
    browser = Cookie()
    browser.login(username, password)
    cookies = browser.get_cookies()
    file = open('cookie.txt', 'w')
    file.write(cookies)
    file.close()
Пример #15
0
class WeSpider(Spider):
    """
    The WeSpider class will use weixin.sogou.com to search the official
    accounts. And get the first ten article infomation of each official
    account.
    """
    article_infos = {}
    cookie_pool = Cookie()
    name = 'wespider'

    def start_requests(self):
        """
        Actually, it's better to use __init__ to pass the attributes. But I've
        tried and failed. So I use scrapy settings for a workaround.
        """
        start_point = {
            config.type_acc: [
                "http://weixin.sogou.com/weixin?type=1&ie=utf8&_sug_=n&_sug_type_=&query=",
                "http://weixin.sogou.com/weixin?query="
            ],
            config.type_all: ["http://weixin.sogou.com/weixin?type=2&query="],
            config.type_day: [
                "http://weixin.sogou.com/weixin?type=2&sourceid=inttime_day&tsn=1&query="
            ],
            config.type_week: [
                "http://weixin.sogou.com/weixin?type=2&sourceid=inttime_week&tsn=2&query="
            ],
            config.type_mon: [
                "http://weixin.sogou.com/weixin?type=2&sourceid=inttime_month&tsn=3&query="
            ],
            config.type_year: [
                "http://weixin.sogou.com/weixin?type=2&sourceid=inttime_year&tsn=4&query="
            ]
        }
        account_list = self.settings.get("ACCOUNT_LIST", [])
        search_type = self.settings.get("SEARCH_TYPE", config.type_acc)
        random_urls = start_point[search_type]
        self.start_urls = map(
            lambda x: random_urls[int(random() * len(random_urls))] + x,
            account_list)
        for i, url in enumerate(self.start_urls):
            cookie = self.cookie_pool.fetch_one()
            if search_type == config.type_acc:
                yield Request(url,
                              cookies=cookie,
                              callback=self.parse,
                              meta={
                                  'cookiejar': i,
                                  'current_cookie': cookie
                              })
            else:
                yield Request(url,
                              cookies=cookie,
                              callback=self.parse_keyword,
                              meta={
                                  'cookiejar': i,
                                  'current_cookie': cookie
                              })

    def parse(self, response):
        """
        Parse the result from the main search page and crawl into each result.
        """
        current_cookie = response.meta['current_cookie']
        logger = logging.getLogger(response.url[-10:])
        logger.debug(str("Current cookie: " + str(current_cookie)))
        if "/antispider/" in response.url:
            cookie = self.cookie_pool.get_banned(current_cookie)
            if cookie:
                logger.debug(
                    str("Got banned. Using new cookie: " + str(cookie)))
                yield Request(response.request.meta['redirect_urls'][0],
                              cookies=cookie,
                              callback=self.parse,
                              meta={
                                  'cookiejar': response.meta['cookiejar'],
                                  'current_cookie': cookie
                              })
            else:
                yield self.error(
                    "Seems our IP was banned. Caught by WeChat Antispider: {}".
                    format(response.url))
        else:
            if self.no_results(response):
                if config.always_return_in_format:
                    yield self.error_in_format("No article found")
                else:
                    yield self.error(u"No article found")
            else:
                self.cookie_pool.set_return_header(
                    response.headers.getlist('Set-Cookie'), current_cookie)
                yield Request(response.xpath(
                    '//div[@class="results mt7"]/div[contains(@class, "wx-rb")]/@href'
                ).extract_first(),
                              callback=self.parse_account)

    def parse_keyword(self, response):
        current_cookie = response.meta['current_cookie']
        logger = logging.getLogger(response.url[-10:])
        logger.debug(str("Current cookie: " + str(current_cookie)))
        if "/antispider/" in response.url:
            cookie = self.cookie_pool.get_banned(current_cookie)
            if cookie:
                logger.debug(
                    str("Got banned. Using new cookie: " + str(cookie)))
                yield Request(response.request.meta['redirect_urls'][0],
                              cookies=cookie,
                              callback=self.parse,
                              meta={
                                  'cookiejar': response.meta['cookiejar'],
                                  'current_cookie': cookie
                              })
            else:
                yield self.error(
                    "Seems our IP was banned. Caught by WeChat Antispider: {}".
                    format(response.url))
        else:
            self.cookie_pool.set_return_header(
                response.headers.getlist('Set-Cookie'), current_cookie)
            articles = response.xpath(
                '//div[@class="results"]/div[contains(@class, "wx-rb")]')
            if self.no_results(response) or not len(articles):
                if config.always_return_in_format:
                    yield self.error_in_format("No article found")
                else:
                    yield self.error("No article found")
            else:
                for i in range(0, len(articles)):
                    url = response.urljoin(
                        articles.xpath('//div/h4/a/@href')[i].extract())
                    cover = hp().unescape(hp().unescape(
                        articles.xpath('//div/a/img/@src')
                        [i].extract())).replace('\\/', '/')
                    date = datetime.fromtimestamp(
                        int(
                            articles.xpath('//div/div/span/script/text()')
                            [i].extract()[22:-2])).strftime(config.date_format)
                    digest = articles.xpath(
                        '//div[@class="txt-box"]/p')[i].extract()
                    self.article_infos[url] = {
                        'cover': cover,
                        'date': date,
                        'digest': digest
                    }
                    yield Request(url, callback=self.parse_article)

    def parse_account(self, response):
        """
        Parse the account page and crawl into each article.

        Note: this account page does not render HTML code from very beginning.
        It use JavaScript and a Json string to render the page dynamicly. So we
        use python-json module to parse the Json string.
        """
        m = re.search(r'var msgList = \'(.*)\'', response.body)
        if not m:
            yield self.error("Invalid response {}".format(response.url))
        else:
            articles = json.loads(m.group(1).replace('"', '"'))['list']
            for article in articles:
                appinfo = article['app_msg_ext_info']
                allinfo = [appinfo
                           ] + (appinfo[u'multi_app_msg_item_list'] if
                                u'multi_app_msg_item_list' in appinfo else [])
                cominfo = article['comm_msg_info']
                for info in allinfo:
                    # Unescape the HTML tags twice
                    url = "http://mp.weixin.qq.com/s?" + hp().unescape(
                        hp().unescape(info['content_url'][4:]))
                    self.article_infos[url] = {
                        'cover':
                        hp().unescape(hp().unescape(info['cover'])).replace(
                            '\\/', '/'),
                        'date':
                        datetime.fromtimestamp(int(
                            cominfo['datetime'])).strftime(config.date_format),
                        'digest':
                        info['digest']
                    }
                    yield Request(url, callback=self.parse_article)

    def parse_article(self, response):
        """
        Finally we've got into the article page. Since response.url is generated
        dynamically, we need to get the permenant URL of the article.
        """
        title = response.xpath(
            '//div[@id="page-content"]/div/h2/text()').extract_first(
                default=config.not_found_hint).strip()
        user = response.xpath('//*[@id="post-user"]/text()').extract_first(
            default=config.not_found_hint).strip()
        m = re.search('var msg_link = .*"([^"]*)";', response.body)
        if not m:
            yield self.error("Something wrong with article {}".format(title))
        else:
            params = ['__biz', 'sn', 'mid', 'idx']
            url = hp().unescape(m.group(1))
            html = str.join(
                "\n",
                response.xpath('//*[@id="js_content"]').extract()).strip()
            info = self.article_infos[response.url]
            yield {
                u'title': unicode(title),
                u'account': unicode(user),
                u'url': unicode(url),
                u'date': unicode(info['date']),
                u'cover': unicode(info['cover']),
                u'digest': unicode(info['digest']),
                u'content': unicode(html)
            }

    def error(self, msg):
        return {
            u"error": unicode(msg),
            u"date": unicode(datetime.now().strftime(config.date_format))
        }

    def no_results(self, response):
        if len(response.xpath("///div[@id='smart_hint_container']")):
            smart_hint = response.xpath(
                "///div[@id='smart_hint_container']/text()").extract_first()
            if u'\uff08\u4e0d\u542b\u5f15\u53f7\uff09\u7684\u641c\u7d22\u7ed3\u679c\uff1a' == smart_hint:
                return True
            else:
                return False
        elif len(response.xpath("///div[@class='no-sosuo']")):
            return True
        else:
            return False

    def error_in_format(self, msg):
        date = str(datetime.now().strftime(config.date_format))
        yesterday = str(
            (datetime.now() - timedelta(days=1)).strftime(config.date_format))
        return {
            u'title': unicode("{} at {}".format(msg, date)),
            u'account': unicode(""),
            u'url': unicode("http://localhost/{}".format(date)),
            u'date': unicode(date),
            u'cover': unicode(""),
            u'digest': unicode(""),
            u'content': unicode("{} at {}".format(msg, date))
        }
Пример #16
0
 def make_cookie(self):
     cookie = Cookie()
     cookie.size = 6
     cookie.hasChocolateChips = True
     if self.delegate:
         self.delegate.on_cookie_baked(cookie)
Пример #17
0
 def create_cookie(self):
     new_cookie = Cookie()
     self.cookie_batch.add(new_cookie)
Пример #18
0
from queue import PriorityQueue
from queue import Queue
from threading import Lock
from threading import Thread
from urllib.parse import urljoin
from requests import Timeout
import requests
from lxml import etree

from cookie import Cookie
from gallery import *
from picture import Picture

# 初始化图片库
# Gallery()
Cookie()

# 创建队列实例, 用于存储任务


class Spider:
    HEADERS = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    }  # 图片网站
    HOST = 'https://anime-pictures.net'
    # 超时时间
    TIMEOUT = 30
    # 爬取网络请求并发数
    REQUEST_THREAD_NUMBER = 5
    # 爬取时间间隔