예제 #1
0
def send(sock, message):
	#TODO: the response is worthless
	HTTP.post(
		'http://' + sock.server + ':' + str(sock.port) + '/socket.io/1/xhr-polling/' + sock.session + '/?name=' + chatserv.user + '&key=' + sock.key + '&roomId=' + str(sock.id) + '&client=Chatserv&version=' + str(chatserv.version),
		'5:::' + json.dumps({'name': 'message', 'args': [message]}),
		{'Content-Type': 'text/plain', 'Cookie': chatserv.session},
		timeout=10
	)
예제 #2
0
def parse_paging(index, loop):
    """
    分页解析,同步方法
    :param index: 页码
    :param loop:是否循环
    :return:
    """
    if loop:
        response = HTTP.get(paging_url + str(index))
        if response.status_code == 200:
            # 请求成功则解析页面内容
            print("开始解析第", index, "页数据")
            with ThreadPoolExecutor(MAX_WORKS) as executor:
                selector = LXML.get_selector(response.content)
                for tag in selector.xpath("//a[@class='caption hidden-md hidden-sm hidden-xs']"):
                    url = base_url + tag.get("href")
                    title = tag.get("title").rstrip("Wallpaper").strip()
                    if db_collection.find({"detail_url": url}).count() <= 0:
                        executor.submit(parse_wallpaper_detail, url, title)
                    else:
                        loop = False
                executor.shutdown()
                print("该页面已解析,解析终止" if not loop else "")
                parse_paging(index + 1, loop)
        elif response.status_code == 403:
            # 如果为403则代表封ip了,需要终止访问
            print("请求第", index, "页出现403,分页终止!")
        else:
            print("请求第", index, "页失败,", RETRY_INTERVAL, "秒后重试")
            time.sleep(RETRY_INTERVAL)
            parse_paging(index, loop)
    pass
예제 #3
0
def login(name = None, passw = None):
	global session, user, password
	if name == None: name = user
	if passw == None: passw = password
	print('Logging in...')
	response = HTTP.post('http://community.wikia.com/api.php', {'action': 'login', 'lgname': name, 'lgpassword': passw, 'format': 'json'})
	cookie = response.getheader('Set-Cookie')
	newsession = cookie[:cookie.find(';') + 1]

	result = json.loads(response.read().decode('utf-8'))
	try: HTTP.post('http://community.wikia.com/api.php', {'action': 'login', 'lgname': name, 'lgpassword': passw, 'lgtoken': result['login']['token']}, {'Cookie': newsession})
	except:
		print(result)
		raise
	user = name #this should minimize race conditions if we ever have to log in again while connected
	password = passw
	session = newsession
	print('Session:', session)
예제 #4
0
def spider(wiki):
	response = HTTP.get(
		'http://' + wiki + '.wikia.com/wikia.php',
		{
			'controller': 'Chat',
			'format': 'json',
			'client': 'Chatserv',
			'version': chatserv.version
		},
		{'Cookie': chatserv.session}
	).read().decode('utf-8')
	return json.loads(response)
예제 #5
0
def session(room, key = None, server = None, port = None):
	if room <= 0: raise Exception('Invalid room ' + room)
	if key == False: raise Exception('\'key\' is false')

	result = HTTP.get(
		'http://' + server + ':' + str(port) + '/socket.io/1/',
		{
			'name': chatserv.user,
			'key': key,
			'roomId': room,
			'client': 'Chatserv',
			'version': chatserv.version
		},
		{'Cookie': chatserv.session}
	).read().decode('utf-8')
	if result[:11] == 'new Error(\'': raise Exception(result[11:-2])
	else: return result[:result.find(':')]
예제 #6
0
def parse_wallpaper_detail(url, title):
    """
    解析壁纸详情页面
    :param url:详情页地址
    :param title:壁纸名称
    :return:
    """
    model = HDQWallsModel.HDQWallsModel(time.time())
    model.update_timestamp = time.time()
    model.detail_url = url
    model.title = title
    # 请求详情页
    response = HTTP.get(url)
    if response.status_code == 200:
        selector = LXML.get_selector(response.content)
        model.author = LXML.get_first_attr_text(selector, "//a[@href and @target and @class]/i", "佚名").lstrip()
        model.author_link = LXML.get_first_attr(selector, "//a[@href and @target and @class]/i/..", "href", "")
        model.original_resolution = LXML.get_first_attr_text(selector, "//blockquote/footer/a[not(@style)]").lstrip()
        # 解析分类标签(仅英文)
        categories = []
        for tag in selector.xpath("//div/ul/li[@id='tags']/../a/li/span"):
            category = tag.text.rstrip(",").rstrip("wallpapers").replace("-", " ").strip()
            # add_category_tag(category)
            categories.append(category)
        model.category_list = categories
        # model.category_list_cn = convert_category_tag(categories)
        # 解析原始文件信息
        original_file = OriginalFileInfoModel.OriginalFileInfoModel()
        original_file.download_url = base_url + LXML.get_first_attr(selector,
                                                                    "//div[@class='wallpaper_container']/div/a[@rel='nofollow']",
                                                                    "href")
        original_file.file_name = os.path.basename(original_file.download_url)
        original_file.file_format = original_file.file_name[original_file.file_name.index(".") + 1:]
        model.original_file_info.update(original_file.__dict__)
        print("解析完成:", title, url)
        db_update(model)
        print("数据库写入完成")
    elif response.status_code == 403:
        print("ip被封禁了!请求终止!")
    else:
        print("请求", title, "失败,", RETRY_INTERVAL, "秒后重试")
        time.sleep(RETRY_INTERVAL)
        parse_wallpaper_detail(url, title)
    pass
예제 #7
0
def download_file(item_dict):
    """
    文件下载
    :param item_dict:
    :return:
    """
    try:
        original_file_info = item_dict["original_file_info"]
        download_url = original_file_info["download_url"]
        response = HTTP.get(download_url, use_proxy=True)
        if response.status_code == 200:
            file_format = original_file_info["file_format"]
            file_name = hashlib.md5(download_url.encode("gbk")).hexdigest()
            file_path = photos_cache_path + "/" + file_name + "." + file_format
            with open(file_path, "wb") as f:
                f.write(response.content)
            # 读取文件信息
            Image.MAX_IMAGE_PIXELS = 1000000000
            image = Image.open(file_path)
            original_file_info["width"] = image.width
            original_file_info["height"] = image.height
            original_file_info["file_size"] = os.path.getsize(file_path)
            # 信息赋值并写入数据库
            item_dict["original_file_info"] = original_file_info
            db_collection.update({"detail_url": item_dict["detail_url"]},
                                 item_dict,
                                 upsert=True)
            print("图片下载完成", download_url)
        elif response.status_code == 401:
            print("ip已被封禁,下载停止")
        else:
            print("下载失败", RETRY_INTERVAL, "秒后重试")
            time.sleep(RETRY_INTERVAL)
            download_url(item_dict)
    except:
        print("文件下载失败")
        download_file(item_dict)
예제 #8
0
def cajax(method, post):
	return HTTP.post(
		'http://community.wikia.com/index.php?acion=ajax&rs=ChatAjax&method=' + method + '&client=Chatserv&version=' + str(chatserv.version),
		post,
		{'Cookie': chatserv.session}
	).read().decode('utf-8')
예제 #9
0
def isloggedin():
	return bool(json.loads(HTTP.get('http://community.wikia.com/api.php', {'action': 'query', 'meta': 'userinfo', 'format': 'json'}, {'Cookie': session}).read().decode('utf-8')))
예제 #10
0
def connect(sock):
	while True:
		response = HTTP.get(
			'http://' + sock.server + ':' + sock.port + '/socket.io/1/xhr-polling/' + sock.session + '/',
			{
				'name': chatserv.user,
				'key': sock.key,
				'roomId': sock.id,
				'client': 'Chatserv',
				'version': chatserv.version
			},
			{'Cookie': chatserv.session},
			timeout=30
		)
		if sock._Chat__killed.isSet(): break
		if response.status == 200:
			print('---------------------------------------')
			data = response.read().decode('utf-8')
			#noop is the single most common event (by far) and by definition cannot be sent with another message.
			#skipping out on the string functions and loop overhead most of the time should help supress context
			#switching overhead and GIL overhead as well
			if data == '8::': continue

			if data[0] != '\ufffd': data = '\ufffd' + str(len(data)) + '\ufffd' + data
			data = data.split('\ufffd')
			i = 1 #data[0] is an empty string
			while i < len(data): #sorry, but range() is stupid. Ain't no one got time for that
				if int(data[i]) != len(data[i + 1]): raise Exception('Message length mismatch') #TODO: ProtocolError
				message = data[i + 1]
				print(message)

				#we don't need this anymore, and 8:: causes a continue, so it's easier to increment here
				i += 2

				#no switch, so these are in frequency order
				if message[0] == '4': #json
					event = json.loads(message[4:])
					chatserv.stack.put(chatserv.StackContext(chatserv.io.receive, sock, event))
					if event['event'] == 'disableReconnect':
						sock.connected.clear()
						return
					elif event['event'] == 'forceReconnect': pass #re auth and such
				elif message[0] == '8': #noop - just in case
					continue
				elif message[0] == '0': #disconnect
					sock.connected.clear()
					return
				elif message[0] == '1': #connect
					if sock.connected.is_set(): continue #why it sometimes spams 1:: is beyond me
					sock.connected.set()
					sock.sendCommand('initquery')
				elif message[0] == '7': #error
					sock.connected.clear()
					raise Exception(message[4:])
				else:
					sock.connected.clear()
					raise Exception('Received unimplemented data type ' + message[0])

		elif response.status == 404: continue #this is what Torus does, I still don't know if it's good or bad
		else: raise Exception('Bad HTTP status ' + response.status)
	sock.connected.clear()