def create_socket(): # Get info about our authentication this.id = 1234 this.token = 1234 parent_this = this parent_this.callbacks = 2 def callback(): parent_this.callbacks -= 1 if parent_this.callbacks == 0: parent_this.socket = MessageSocket(lambda parent_this: parent_this.handle_message(this), parent_this.token, parent_this.id) def data_callback(data): parent_this.id = data['id'] callback() def auth_callback(data): parent_this.token = data['token'] callback() Fetch.get("data/") \ .then(lambda response: response.json()) \ .then(lambda data: data_callback(data)) Fetch.get("auth/") \ .then(lambda response: response.json()) \ .then(lambda data: auth_callback(data))
class FetchTestCase(unittest.TestCase): def setUp(self): self.fetch = Fetch(username='******', pw='liumengchao') def test_get(self): resp = self.fetch.get('http://www.douban.com/update/') print resp resp = self.fetch.get('http://www.douban.com/fdsafdsafdsa/') print resp resp = self.fetch.get('dajfdsoajfeowaof') print resp
def load(self, loadUrl): o = urlparse(loadUrl) print o.scheme print o.port print o o = urlsplit(loadUrl) print o.geturl() reader = Fetch() htmlFile = reader.get(loadUrl) soup = BeautifulSoup(htmlFile) form = soup.find('form') print form print form['action'] print form['method'] #print form['onsubmit'] if 'action' in form: print form['action'] if 'method' in form: print form['method'] if 'onsubmit' in form: print form['onsubmit'] data = {"username": "******", "password": "******"} response = reader.post('https://210.154.183.61:4443/mantis/login.php', data) print response
def load(self, loadUrl): o = urlparse(loadUrl) print o.scheme print o.port print o o = urlsplit(loadUrl) print o.geturl() reader = Fetch() htmlFile = reader.get(loadUrl) soup = BeautifulSoup(htmlFile) form = soup.find('form') print form print form['action'] print form['method'] #print form['onsubmit'] if 'action' in form: print form['action'] if 'method' in form: print form['method'] if 'onsubmit' in form: print form['onsubmit']
def load(self, loadUrl): o = urlparse(loadUrl) print o.scheme print o.port print o o = urlsplit(loadUrl) print o.geturl() reader = Fetch() htmlFile = reader.get(loadUrl) soup = BeautifulSoup(htmlFile) form = soup.find("form") print form print form["action"] print form["method"] # print form['onsubmit'] if "action" in form: print form["action"] if "method" in form: print form["method"] if "onsubmit" in form: print form["onsubmit"]
def load(self, loadUrl): o = urlparse(loadUrl) print o.scheme print o.port print o o = urlsplit(loadUrl) print o.geturl() reader = Fetch() htmlFile = reader.get(loadUrl) soup = BeautifulSoup(htmlFile) form = soup.find("form") print form print form["action"] print form["method"] # print form['onsubmit'] if "action" in form: print form["action"] if "method" in form: print form["method"] if "onsubmit" in form: print form["onsubmit"] data = {"username": "******", "password": "******"} response = reader.post("https://210.154.183.61:4443/mantis/login.php", data) print response
class BooksTask: ''' get books ''' def __init__(self): self.__reset() self.__read_info() self._fetch = Fetch(username='******', pw='liumengchao') self._tasks_url = 'http://localhost:8080/id/books/' self._url = 'https://api.douban.com/v2/book/user/%s/collections?count=%d&start=%d' self._upload_url = 'http://localhost:8080/upload/' logging.basicConfig(filename='user_books_error.log', filemod='a+', level=logging.ERROR) def __del__(self): self.__save_info() def __reset(self): self._status = 'free' self._free_tasks = set() self._done_tasks = set() def __read_info(self): if os.path.exists('books_task_config.cfg'): with open('books_task_config.cfg', 'r') as f: cfg = json.loads(f.read()) self._status = cfg.get('status') self._free_tasks = set(cfg.get('free_tasks')) self._done_tasks = set(cfg.get('done_tasks')) def __save_info(self): with open('books_task_config.cfg', 'w') as f: cfg = {} cfg['status'] = self._status cfg['free_tasks'] = list(self._free_tasks) cfg['done_tasks'] = list(self._done_tasks) f.write(json.dumps(cfg)) def __get_tasks(self): if self._status == 'free': print 'getting task....' resp = requests.get(self._tasks_url) js = resp.json() for t in js.get('tasks'): self._free_tasks.add(t) self._status = 'running' def __do_tasks(self): with open('books.txt', 'a') as f: for t in self._free_tasks: if t not in self._done_tasks: print 'fetch %s....' % t books = self.__get_books(t) obj = {'_id':t, 'books':books} f.write(json.dumps(obj) + '\n') self._done_tasks.add(t) def __get_books(self, user): books = [] count = 100 for i in range(20): url = self._url % (user, count, i*count) content = self._fetch.get(url, sleeptime=6) js = json.loads(content.decode('utf-8', 'ignore')) books.extend(js.get('collections')) if (i+1)*count >= js.get('total'): break return books def __upload_tasks(self): tasks = {'type':'books', 'data':[]} with open('books.txt', 'r') as f: for line in f: obj = json.loads(line.rstrip('\n')) tasks['data'].append(obj) while True: print 'uploading task ....' data = json.dumps(tasks) headers = {'Content-type':'application/json; charset=utf8'} resp = requests.put(self._upload_url, data=data, headers=headers) js = resp.json() if js.get('code') == 200: self.__reset() os.remove('books.txt') break def run(self): while True: try: self.__get_tasks() self.__do_tasks() self.__upload_tasks() except KeyboardInterrupt: break except Exception, e: logging.error(repr(e))
class FollowedTask: ''' get douban user followed''' def __init__(self): self.__reset() self.__read_info() self._fetch = Fetch(username='******', pw='liumengchao') self._tasks_url = 'http://localhost:8080/id/followed/' self._url = 'http://www.douban.com/people/%s/contacts' self._upload_url = 'http://localhost:8080/upload/' logging.basicConfig(filename='followed_error.log', filemod='a+', level=logging.ERROR) def __del__(self): self.__save_info() def __reset(self): self._status = 'free' self._free_tasks = set() self._done_tasks = set() def __read_info(self): if os.path.exists('user_followed_config.cfg'): with open('user_followed_config.cfg', 'r') as f: cfg = json.loads(f.read()) self._status = cfg.get('status') self._free_tasks = set(cfg.get('free_tasks')) self._done_tasks = set(cfg.get('done_tasks')) def __save_info(self): with open('user_followed_config.cfg', 'w') as f: cfg = {} cfg['status'] = self._status cfg['free_tasks'] = list(self._free_tasks) cfg['done_tasks'] = list(self._done_tasks) f.write(json.dumps(cfg)) def __get_followed(self, user): page = self._fetch.get(self._url % user, sleeptime=2.1) followed = user_followed_parser(page) return followed def __get_tasks(self): if self._status == 'free': print 'get tasks.....' resp = requests.get(self._tasks_url) js = resp.json() self._free_tasks = js.get('tasks') self._status = 'running' def __do_tasks(self): with open('followed.txt', 'a') as f: for t in self._free_tasks: if t not in self._done_tasks: print 'fetching %s' % t obj = {'_id':t} obj['followed'] = self.__get_followed(t) f.write(json.dumps(obj) + '\n') self._done_tasks.add(t) def __upload_tasks(self): tasks = {'type':'followed', 'data':[]} with open('followed.txt', 'r') as f: for line in f: obj = json.loads(line.rstrip('\n')) tasks['data'].append(obj) data = json.dumps(tasks) headers = {'Content-type':'application/json; charset=utf8'} while True: print 'uploading ' resp = requests.put(self._upload_url, data=data, headers=headers) js = resp.json() if js.get('code') == 200: os.remove('followed.txt') self.__reset() break def run(self): while True: try: self.__get_tasks() self.__do_tasks() self.__upload_tasks() except KeyboardInterrupt: break except Exception, e: logging.error(repr(e))