Exemplo n.º 1
0
    def send_tags(self, method, info, tags, sk):
        """Sends tags to last.fm. method is one of:
        album.addtags, artist.addtags or track.addtags
        info_dict is the artist, track and album info
        tags is a comma delimited list of no more than 10 tags"""
        
        #All methods require these parameters:
        #tags (Required) : A comma delimited list of user supplied tags to apply
        #to this album. Accepts a maximum of 10 tags.
        #api_key (Required) : A Last.fm API key.
        #api_sig (Required)
        #sk (Required)
        #artist (Required) : The artist name in question

        post_values = {
            "method" : method,
            "tags" : tags,
            "api_key" : self.api_key,
            "sk" : sk,
            "artist" : info['Artist']}

        #these methods require additional info:
        #album.addTags -> album
        #track.addTags -> track
        if method == "album.addtags":
            post_values['album'] = info['Album']
        if method == "track.addtags":
            post_values['track'] = info['Track']
        post_values['api_sig'] = self.create_api_sig(post_values)
        conn = HttpRequest(self.url, urllib.urlencode(post_values))
        response = conn.connect()
Exemplo n.º 2
0
    def send_tags(self, method, info, tags, sk):
        """Sends tags to last.fm. method is one of:
        album.addtags, artist.addtags or track.addtags
        info_dict is the artist, track and album info
        tags is a comma delimited list of no more than 10 tags"""

        #All methods require these parameters:
        #tags (Required) : A comma delimited list of user supplied tags to apply
        #to this album. Accepts a maximum of 10 tags.
        #api_key (Required) : A Last.fm API key.
        #api_sig (Required)
        #sk (Required)
        #artist (Required) : The artist name in question

        post_values = {
            "method": method,
            "tags": tags,
            "api_key": self.api_key,
            "sk": sk,
            "artist": info['Artist']
        }

        #these methods require additional info:
        #album.addTags -> album
        #track.addTags -> track
        if method == "album.addtags":
            post_values['album'] = info['Album']
        if method == "track.addtags":
            post_values['track'] = info['Track']
        post_values['api_sig'] = self.create_api_sig(post_values)
        conn = HttpRequest(self.url, urllib.urlencode(post_values))
        response = conn.connect()
Exemplo n.º 3
0
 def _send_post(self, post_values):
     req = HttpRequest(url=self.submission_url, data=post_values, timeout=10)
     success, msg = req.connect()
     if success:
         self.deletion_ids.extend(self.del_ids)    
         return True
     else:
         self.parent.write_info(_("There was an error sending data to last.fm:") +
                                "\n" + "\n".join(msg))
         return False
Exemplo n.º 4
0
def main():
    req = HttpRequest()
    req.start()
    hthread = hue.HueThread(ip="192.168.10.2")
    hthread.start()
    while True:
        # get image
        ret, img = cap.read()
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # face detect
        faces = face_cascade.detectMultiScale(gray,
                                              scaleFactor=1.3,
                                              minNeighbors=5)
        human_num = len(faces)

        # if face detect
        if (human_num > 0):
            hthread.changeState(hue.MeetingStart())
            #time = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
            time = datetime.now().isoformat()
            #time.microsecond = 0
            #time = time.isoformat()
            print(time, ' human_num : ', human_num)
            req.add(
                StatusReq(room=room_Name, timestamp=time, occupied=human_num))
        else:
            hthread.changeState(hue.MeetingEnd())

        # draw rect
        for x, y, w, h in faces:
            cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
            #face = img[y: y + h, x: x + w]
            #face_gray = gray[y: y + h, x: x + w]
            #eyes = eye_cascade.detectMultiScale(face_gray)
            #for (ex, ey, ew, eh) in eyes:
            #    cv2.rectangle(face, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2)

        # show img
        cv2.imshow('video image', img)

        # wait key
        key = cv2.waitKey(10)

        # quit
        if key == 27:  # ESCキーで終了
            break

    cap.release()
    cv2.destroyAllWindows()

    req.stop()
    req.join()
Exemplo n.º 5
0
 def handshake(self):
     self.timestamp = self.create_timestamp()
     self.authentication_code = self.create_authentication_code()
     self.url += r"/?" + self.encode_url()
     
     req = HttpRequest(url=self.url, timeout=10)
     success, response = req.connect()
        
     if success:
         self.session_id = response[1]
         self.submission_url = response[3]
         
     msg = req.handshake_response(response[0])
     return response[0], msg
Exemplo n.º 6
0
 def get_popular_tags(self, method, info_dict):
     """method is either artist.gettoptags or track.gettoptags"""
     #Params
     #track (Optional) : The track name in question
     #artist (Required) : The artist name in question
     #api_key (Required) : A Last.fm API key.
     dict = {"method" : method,
          "artist" : info_dict['Artist'],
          "api_key" : self.api_key}
     if method == "track.gettoptags":
         dict['track'] = info_dict['Track']
     encoded_values = urllib.urlencode(dict)
     url = self.url + "?" + encoded_values
     conn = HttpRequest(url) 
     xml_doc = conn.connect(xml=True)
     return self.parse_xml_doc(xml_doc, "name")
Exemplo n.º 7
0
 def get_user_top_tags(self, username, limit=15):
     #method user.getTopTags
     #Params
     #user (Required) : The user name
     #limit (Optional) : Limit the number of tags returned 
     #api_key (Required) : A Last.fm API key.
     encoded_values = urllib.urlencode(
         {"method" : "user.gettoptags",
          "user" : username,
          "limit" : limit,
          "api_key" : self.api_key}
         )
     url = self.url + "?" + encoded_values
     conn = HttpRequest(url) 
     xml_doc = conn.connect(xml=True)
     return self.parse_xml_doc(xml_doc, "name")
Exemplo n.º 8
0
 def get_user_top_tags(self, username, limit=15):
     #method user.getTopTags
     #Params
     #user (Required) : The user name
     #limit (Optional) : Limit the number of tags returned
     #api_key (Required) : A Last.fm API key.
     encoded_values = urllib.urlencode({
         "method": "user.gettoptags",
         "user": username,
         "limit": limit,
         "api_key": self.api_key
     })
     url = self.url + "?" + encoded_values
     conn = HttpRequest(url)
     xml_doc = conn.connect(xml=True)
     return self.parse_xml_doc(xml_doc, "name")
Exemplo n.º 9
0
 def set_user_image(self):
     webservice = webservices.LastfmWebService()
     url = "http://ws.audioscrobbler.com/2.0/?method=user.getinfo&user=%s&api_key=%s"
     request = HttpRequest(url=url % (self.username, webservice.api_key), timeout=10)
     msg = request.connect(xml=True)
     image_url = webservice.parse_xml(msg, "image")
     if image_url is None:
         return
     if not os.path.exists(self.HOME_DIR + os.path.basename(image_url)):
         request = HttpRequest(image_url)
         request.retrieve(image_url, self.HOME_DIR + os.path.basename(image_url),
                          self.tree.get_widget("user_thumb"))
     else:
         image = gtk.gdk.pixbuf_new_from_file_at_size(self.HOME_DIR +
                                                      os.path.basename(image_url),
                                                      100, 40)
         self.tree.get_widget("user_thumb").set_from_pixbuf(image)
Exemplo n.º 10
0
 def _send_post(self, post_values):
     req = HttpRequest(url=self.submission_url, data=post_values, timeout=10)
     success, msg = req.connect()
     if success:
         self.deletion_ids.extend(self.del_ids)    
         return True
     else:
         if msg[0] == "BADSESSION":
             self.bad_session_count += 1
             server_response, message = self.handshake()
             if server_response == "OK" and self.bad_session_count <= 3:
                 return self._send_post(post_values)
             else:
                 self.parent.write_info(_("Error during handshake."))
         self.parent.write_info(_("There was an error sending data to last.fm:") +
                                "\n" + "\n".join(msg))
         return False
Exemplo n.º 11
0
 def get_popular_tags(self, method, info_dict):
     """method is either artist.gettoptags or track.gettoptags"""
     #Params
     #track (Optional) : The track name in question
     #artist (Required) : The artist name in question
     #api_key (Required) : A Last.fm API key.
     dict = {
         "method": method,
         "artist": info_dict['Artist'],
         "api_key": self.api_key
     }
     if method == "track.gettoptags":
         dict['track'] = info_dict['Track']
     encoded_values = urllib.urlencode(dict)
     url = self.url + "?" + encoded_values
     conn = HttpRequest(url)
     xml_doc = conn.connect(xml=True)
     return self.parse_xml_doc(xml_doc, "name")
Exemplo n.º 12
0
    def __init__(self, client, address):
        """ Handles the request and sends a response to the client.

        Args:
            client (socket.socket): The client of the request.
            address (tuple(str, int)): The client address and port, for logging purposes.
        """
        self.__client = client
        self.__address = address
        self.__response = HttpResponse()
        self.__request = None
        self.__close_connection = True
        stop_handling_request = False
        try:
            """ First parses the HTTP request and, if there are hooks to call after parsing them, calls them.
            """
            self.__request = HttpRequest(client)
            self.__after_parsing()

        except StopHandlingRequestException:
            """ If there is any reason to stop the regular execution of the request handling, the after parsing hooks
            have to raise a `StopHandlingRequestException`. See `after_parsing_request` method for more information.
            """
            stop_handling_request = True

        except HttpRequestParseErrorException:
            """ If the request cannot be parsed, it returns a 400 HTTP error code to the client.
            """
            stop_handling_request = True
            self.__response.status = 400

        if not stop_handling_request:
            request_uri = self.__request.request_uri
            request_method = self.__request.method
            """ Checks if the request has a valid HTTP method, if not, it returns a 400 HTTP error code to the client.
            """
            if request_method in [
                    "GET", "POST", "HEAD", "PUT", "DELETE", "TRACE", "OPTIONS",
                    "CONNECT", "PATCH"
            ]:
                """ Checks if the request is for the API or the app and handles it accordingly.
                """
                if request_uri == self.__API_URI or request_uri.startswith(
                        self.__API_URI + "/"):
                    self.__handle_api_request()
                else:
                    self.__handle_app_request()

            else:
                self.__response.status = 400

            self.__end_handling()

        else:
            self.__end_handling()
Exemplo n.º 13
0
 def _send_post(self, post_values):
     req = HttpRequest(url=self.submission_url,
                       data=post_values,
                       timeout=10)
     success, msg = req.connect()
     if success:
         self.deletion_ids.extend(self.del_ids)
         return True
     else:
         if msg[0] == "BADSESSION":
             self.bad_session_count += 1
             server_response, message = self.handshake()
             if server_response == "OK" and self.bad_session_count <= 3:
                 return self._send_post(post_values)
             else:
                 self.parent.write_info(_("Error during handshake."))
         self.parent.write_info(
             _("There was an error sending data to last.fm:") + "\n" +
             "\n".join(msg))
         return False
Exemplo n.º 14
0
 def handshake(self):
     self.handshake_attempts += 1
     self.timestamp = self.create_timestamp()
     self.authentication_code = self.create_authentication_code()
     self.url = self.base_url + r"/?" + self.encode_url()
     req = HttpRequest(url=self.url, timeout=10)
     success, response = req.connect()
     print req.handshake_response(response[0])
     if success:
         self.session_id = response[1]
         status = response[0]
         self.submission_url = response[3]
         if status != "OK":
             if self.handshake_attempts <= 3:
                 print "Handshake error attempt %d of 3" % self.handshake_attempts
                 return self.handshake()
             else:
                 return "FAILED", "Failed to make a handshake with Last.fm"
         else:
             self.handshake_attempts = 0
     msg = req.handshake_response(response[0])
     return response[0], msg
Exemplo n.º 15
0
 def set_user_image(self):
     webservice = webservices.LastfmWebService()
     url = "http://ws.audioscrobbler.com/2.0/?method=user.getinfo&user=%s&api_key=%s"
     request = HttpRequest(url=url % (self.username, webservice.api_key),
                           timeout=10)
     msg = request.connect(xml=True)
     image_url = webservice.parse_xml(msg, "image")
     if image_url is None:
         return
     if not os.path.exists(self.HOME_DIR + os.path.basename(image_url)):
         request = HttpRequest(image_url)
         request.retrieve(image_url,
                          self.HOME_DIR + os.path.basename(image_url),
                          self.tree.get_widget("user_thumb"))
     else:
         image = gtk.gdk.pixbuf_new_from_file_at_size(
             self.HOME_DIR + os.path.basename(image_url), 100, 40)
         self.tree.get_widget("user_thumb").set_from_pixbuf(image)
Exemplo n.º 16
0
 def crawl(self, response):
     print(response)
     urls = [
         'https://list.mgtv.com/3/176--------a1-c2-1--a1-.html?channelId=3',
         'https://list.mgtv.com/3/175--------a1-c2-1--a1-.html?channelId=3',
         'https://list.mgtv.com/3/177--------a1-c2-1--a1-.html?channelId=3',
         'https://list.mgtv.com/3/178--------a1-c2-1--a1-.html?channelId=3',
         'https://list.mgtv.com/3/43--------a1-c2-1--a1-.html?channelId=3',
         'https://list.mgtv.com/3/44--------a1-c2-1--a1-.html?channelId=3',
         'https://list.mgtv.com/2/a1-10--------c2-1---.html?channelId=2',
         'https://list.mgtv.com/2/a1-12--------c2-1---.html?channelId=2',
         'https://list.mgtv.com/2/a1-11--------c2-1---.html?channelId=2',
         'https://list.mgtv.com/2/a1-193--------c2-1---.html?channelId=2',
         'https://list.mgtv.com/50/a1-52--------c2-1---.html?channelId=50',
         'https://list.mgtv.com/50/a1-53--------c2-1---.html?channelId=50',
         'https://list.mgtv.com/1/a1-1--------c2-1---.html?channelId=1',
         'https://list.mgtv.com/1/a1-2--------c2-1---.html?channelId=1'
     ]
     for url in urls:
         yield HttpRequest(url=url, callback=self.crawl1)
Exemplo n.º 17
0
 def handshake(self):
     self.handshake_attempts += 1
     self.timestamp = self.create_timestamp()
     self.authentication_code = self.create_authentication_code()
     self.url = self.base_url + r"/?" + self.encode_url()
     req = HttpRequest(url=self.url, timeout=10)
     success, response = req.connect()
     print req.handshake_response(response[0])
     if success:
         self.session_id = response[1]
         status = response[0]
         self.submission_url = response[3]
         if status != "OK":
             if self.handshake_attempts <= 3:
                 print "Handshake error attempt %d of 3" % self.handshake_attempts
                 return self.handshake()
             else:
                 return "FAILED", "Failed to make a handshake with Last.fm"
         else:
             self.handshake_attempts = 0
     msg = req.handshake_response(response[0])
     return response[0], msg
Exemplo n.º 18
0
def get_post_content_and_time( post_url, post_type, post_name, time_last_time ):
    post_full_url = 'http://www.hi-pda.com/forum/' + post_url
    post_headers = {
        'Referer' : 'http://www.hi-pda.com/forum/',
        'Host'    : 'www.hi-pda.com'
    }
    
    logging.info( 'Get post[%s] by url[%s].' % ( post_name, post_url ) )

    #根据url请求帖子内容
    post_request = HttpRequest( post_full_url, None, post_headers )
    post_request.send_request()
    post_resp_content = post_request.get_resp_content()
    try:
        post_resp_content = post_resp_content.decode('gbk')
    except UnicodeDecodeError as e:
        logging.error( 'Decode post response content failed.' )
        logging.exception( e )
    
    #从帖子内容中解析帖子的摘要
    re_pattern_content = re.compile( r'''<meta name="description" content="(.*)" />''' )
    result_content = re_pattern_content.search( post_resp_content )

    post_content = None
    post_update_time = None
    post_create_time = None

    if result_content is None:
        logging.warn( 'Request failed.' )
    else:
        post_content = result_content.groups()[0]

    if post_content is None:
        logging.warn( 'Get post conetent failed.' )
    else:
        #从帖子内容中解析帖子更新时间
        re_pattern_update_time = re.compile( r'''于 (.*) 编辑''' )
        result_update_time = re_pattern_update_time.search( post_content )
        if result_update_time is None:
            pass
        else:
            post_update_time = result_update_time.groups()[0]

    if post_update_time is None:
        #从帖子内容中解析帖子发表时间
        re_pattern_create_time = re.compile( r'''<em id=".+">发表于 (.+)</em>''' )
        result_create_time = re_pattern_create_time.search( post_resp_content )
        
        if result_create_time is None:
            logging.warn( 'Get post time failed.' )
        else:
            post_create_time = result_create_time.groups()[0]
    else:
        post_create_time = post_update_time

    
    post_create_time_datetime = datetime.strptime(post_create_time, '%Y-%m-%d %H:%M')
    post_create_time_stamp = post_create_time_datetime.timestamp()


    post = None

    #比较帖子时间和上次爬取时间,如果大于上次爬取时间,则视为本次爬取目标
    if ( post_create_time_stamp - time_last_time ) > 0:

        conn = mysql.connector.connect(user = db_user, password = db_passwd, database = db_name)
        cursor = conn.cursor()
        logging.info( 'post_type:' + post_type )
        logging.info( 'post_name:' + post_name )
        logging.info( 'post_url:' + post_full_url )
        logging.info( 'post_create_time:' + post_create_time )
        logging.info( 'post_content:' + post_content )
        
        post_id = next_id()
        post = Post( id = post_id, post_type = post_type, post_title = post_name, post_owner = 'hipda', post_content = post_content, post_link = post_full_url, post_time = post_create_time )
        # post.save()
        cursor.execute('insert into posts (id, post_type, post_title, post_owner, post_content, post_link, post_time, created_at ) values (%s, %s, %s, %s, %s, %s, %s, %s)', [post_id, post_type, post_name, 'hipda', post_content, post_full_url, post_create_time_stamp, post_create_time_stamp])
        conn.commit()
        cursor.close()
        conn.close()
        time.sleep( 1 )
    else:
        logging.info( 'Post time[%s] is not after last time.' % post_create_time_datetime )

    return post
Exemplo n.º 19
0
 def format_crawl(cls):
     self = cls(cls.start_urls)
     for url in self.start_urls:
         yield HttpRequest(url=url, callback=self.crawl)
Exemplo n.º 20
0
class PreRequest:
    def __init__(self):
        self.httpRequest = HttpRequest()

    def request(self, data):
        mode = config.get_config("mode", "mode")
        if mode == "upload":
            self.upload(data)
        elif mode == "normal":
            self.normal(data)
        elif mode == "verifygame":
            if config.get_config("verifygame", "step") == "1":
                self.verify_1(data)
            elif config.get_config("verifygame", "step") == "2":
                self.verify_2(data)

    def normal(self, jsondata):
        Pylog.info("".join(jsondata.keys()) + ": 开始测试")
        for k, v in jsondata.items():
            url = "http://" + config.get_config("normal", "url") + v["url"]
            method = v["method"]
            data = v["data"]
            self.reps = self.httpRequest.post(
                url=url,
                data=data) if method == "POST" else self.httpRequest.get(
                    url=url, data=data)
            Pylog.debug("Request Body:" + str(self.reps.request.body))
            Pylog.debug("Response:" + self.reps.text)

    def upload(self, filename=None):
        Pylog.info(filename + ": 开始上载")
        try:
            src = config.get_config("upload", "src")
            savefile = config.get_config("upload", "savefile")
            savename = re.findall('E:/OtherFile/(.*)', src, re.S)[0]
            idfile = savefile + savename + '.csv'
            url = "http://img.will888.cn/photo/upload"
            files = {'pic': open(filename, 'rb')}
            self.reps = self.httpRequest.upload(url=url, files=files)
            Pylog.debug("Response:" + self.reps.text)
            pic = json.loads(self.reps.content)
            #CSV写入
            csvfile = open(idfile, 'a', encoding='utf8', newline='')
            writer = csv.writer(csvfile)
            picId = pic["picid"]
            picname = re.findall(src + '/(.*)', filename, re.S)
            writer.writerow([picname[0], picId])
            csvfile.close()
        except Exception as e:
            Pylog.error(e)

    def verify_1(self, data):
        game = config.get_config("verifygame", "game")
        urlfile = config.get_config("verifygame", "urlfile")
        Pylog.info(data)
        datas = {"id": data}
        self.httpRequest.headers = {
            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
            "Authorization":
            "MAuth-870fc3d727723a7410d2c0fa15154072cdb9300e9a54f89e09b9a27d32f852a44fd07348ae46208298303f281bcdc9a9079fa0b79310115038b4071b44edbe42-MAuth",
            "X-APP-ID": "20"
        }
        url = "http://" + config.get_config("normal",
                                            "url") + "/v1/config/kd/game/start"
        self.reps = self.httpRequest.post(url=url, data=datas)
        Pylog.debug("Request Body:" + str(self.reps.request.body))
        Pylog.debug("Response:" + self.reps.text)

        csvfile = open(urlfile + '/' + game + '.csv',
                       'a',
                       encoding='utf8',
                       newline='')
        writer = csv.writer(csvfile)
        #正则表达式
        gameurl = re.findall("action='(.*)'>        </form>", self.reps.text,
                             re.S)
        writer.writerow([data, gameurl[0]])
        csvfile.close()

    def verify_2(self, data):
        Pylog.info(list(data.keys())[0])
        game_id = list(data.keys())[0]
        game_url = data[game_id]
        self.httpRequest.headers = {
            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
            "Authorization":
            "MAuth-870fc3d727723a7410d2c0fa15154072cdb9300e9a54f89e09b9a27d32f852a44fd07348ae46208298303f281bcdc9a9079fa0b79310115038b4071b44edbe42-MAuth",
            "X-APP-ID": "20"
        }
        self.reps = self.httpRequest.get(url=game_url)
        Pylog.debug("Request Body:" + str(self.reps.request.body))
Exemplo n.º 21
0
 def __init__(self):
     self.httpRequest = HttpRequest()
Exemplo n.º 22
0
	def getRequest(self):
		url = self.url + "?" + HttpRequest.urlencode(self.params)
		return urllib2.Request(url)
Exemplo n.º 23
0
        else:
            get_post_time_last_time = datetime.strptime(
                get_post_time_last_time, '%Y-%m-%d %H:%M:%S')
            get_post_time_last_time_stamp = get_post_time_last_time.timestamp()

        logging.info('************last time: %s************' %
                     get_post_time_last_time)

        #using cookieJar & HTTPCookieProcessor to automatically handle cookies
        cj = http.cookiejar.CookieJar()
        opener = urllib.request.build_opener(
            urllib.request.HTTPCookieProcessor(cj))
        urllib.request.install_opener(opener)

        pda_url = 'http://www.hi-pda.com/'
        pda_request = HttpRequest(pda_url)
        pda_request.send_request()
        pda_resp_content = pda_request.get_resp_content()

        formhash_url = 'http://www.hi-pda.com/forum/logging.php?action=login&referer=http%3A//www.hi-pda.com/forum/'
        formhash_request = HttpRequest(formhash_url, None,
                                       {'Host': 'www.hi-pda.com'})
        formhash_request.send_request()
        formhash_resp_content = formhash_request.get_resp_content()
        try:
            formhash_resp_content = formhash_resp_content.decode('gbk')
        except UnicodeDecodeError as e:
            logging.error('Decode formhash response content failed.')
            logging.exception(e)

        # print( formhash_resp_content )
Exemplo n.º 24
0
def get_post_content_and_time( post_url, post_type, post_name, time_last_time ):
    post_full_url = 'http://www.hi-pda.com/forum/' + post_url
    post_headers = {
        'Referer' : 'http://www.hi-pda.com/forum/',
        'Host'    : 'www.hi-pda.com'
    }
    
    logging.info( 'Get post[%s] by url[%s].' % ( post_name, post_url ) )

    post_request = HttpRequest( post_full_url, None, post_headers )
    try:
        post_request.send_request()
    except TimeoutError:
            logging.warn(" Request url[%s] failed. " % post_full_url)
    post_resp_content = post_request.get_resp_content()
    try:
        post_resp_content = post_resp_content.decode('gbk')
    except UnicodeDecodeError as e:
        logging.error( 'Decode post response content failed.' )
        logging.exception( e )

    #<meta name="description" content=" Hi!PDA  本帖最后由 一炮而红 于 2015-12-1 22:59 编辑 三原色智能通讯欢迎您! http://187161236.taobao.com/ 论坛5年商家。&amp;nbsp;&amp;nbsp;微信&amp;nbsp;&amp;nbsp;QQ:18 ... - Board" />
    re_pattern_content = re.compile( r'''<meta name="description" content="(.*)" />''' )
    result_content = re_pattern_content.search( post_resp_content )

    post_content = None
    post_update_time = None
    post_create_time = None

    if result_content is None:
        logging.warn( 'Request failed.' )
    else:
        post_content = result_content.groups()[0]

    if post_content is None:
        logging.warn( 'Get post conetent failed.' )
    else:
        re_pattern_update_time = re.compile( r'''于 (.*) 编辑''' )
        result_update_time = re_pattern_update_time.search( post_content )
        if result_update_time is None:
            pass
        else:
            post_update_time = result_update_time.groups()[0]

    if post_update_time is None:
        re_pattern_create_time = re.compile( r'''<em id=".+">发表于 (.+)</em>''' )
        result_create_time = re_pattern_create_time.search( post_resp_content )
        
        if result_create_time is None:
            logging.warn( 'Get post time failed.' )
        else:
            post_create_time = result_create_time.groups()[0]
    else:
        post_create_time = post_update_time


    
    post_create_time_datetime = datetime.strptime(post_create_time, '%Y-%m-%d %H:%M')
    post_create_time_stamp = post_create_time_datetime.timestamp()

    post_create_time_stamp - time_last_time

    post = None

    if ( post_create_time_stamp - time_last_time ) > 0:

        conn = mysql.connector.connect(user = db_user, password = db_passwd, database = db_name)
        cursor = conn.cursor()
        logging.info( 'post_type:' + post_type )
        logging.info( 'post_name:' + post_name )
        logging.info( 'post_url:' + post_full_url )
        logging.info( 'post_create_time:' + post_create_time )
        logging.info( 'post_content:' + post_content )
        
        post_id = next_id()
        post = Post( id = post_id, post_type = post_type, post_title = post_name, post_owner = 'hipda', post_content = post_content, post_link = post_full_url, post_time = post_create_time )
        # post.save()
        cursor.execute('insert into posts (id, post_type, post_title, post_owner, post_content, post_link, post_time, created_at ) values (%s, %s, %s, %s, %s, %s, %s, %s)', [post_id, post_type, post_name, 'hipda', post_content, post_full_url, post_create_time_stamp, post_create_time_stamp])
        conn.commit()
        cursor.close()
        conn.close()
        time.sleep( 1 )
    else:
        logging.info( 'Post time[%s] is not after last time.' % post_create_time_datetime )

    return post
Exemplo n.º 25
0
	def getRequest(self):
		return urllib2.Request(self.url, HttpRequest.urlencode(self.params))
Exemplo n.º 26
0
        if get_post_time_last_time == '':
            get_post_time_last_time = 0
            get_post_time_last_time_stamp = 0
        else:
            get_post_time_last_time = datetime.strptime( get_post_time_last_time, '%Y-%m-%d %H:%M:%S' )
            get_post_time_last_time_stamp = get_post_time_last_time.timestamp()

        logging.info( '************last time: %s************' % get_post_time_last_time )

        #using cookieJar & HTTPCookieProcessor to automatically handle cookies
        cj = http.cookiejar.CookieJar()
        opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
        urllib.request.install_opener(opener)

        pda_url = 'http://www.hi-pda.com/'
        pda_request = HttpRequest( pda_url )
        pda_request.send_request()
        pda_resp_content = pda_request.get_resp_content()

        formhash_url = 'http://www.hi-pda.com/forum/logging.php?action=login&referer=http%3A//www.hi-pda.com/forum/'
        formhash_request = HttpRequest( formhash_url, None, { 'Host' : 'www.hi-pda.com' } )
        formhash_request.send_request()
        formhash_resp_content = formhash_request.get_resp_content()
        try:
            formhash_resp_content = formhash_resp_content.decode('gbk')
        except UnicodeDecodeError as e:
            logging.error( 'Decode formhash response content failed.' )
            logging.exception( e )

        # print( formhash_resp_content )
        # <input type="hidden" name="formhash" value="2f68efff" />
Exemplo n.º 27
0
def get_post_content_and_time(post_url, post_type, post_name, time_last_time):
    post_full_url = 'http://www.hi-pda.com/forum/' + post_url
    post_headers = {
        'Referer': 'http://www.hi-pda.com/forum/',
        'Host': 'www.hi-pda.com'
    }

    logging.info('Get post[%s] by url[%s].' % (post_name, post_url))

    #根据url请求帖子内容
    post_request = HttpRequest(post_full_url, None, post_headers)
    post_request.send_request()
    post_resp_content = post_request.get_resp_content()
    try:
        post_resp_content = post_resp_content.decode('gbk')
    except UnicodeDecodeError as e:
        logging.error('Decode post response content failed.')
        logging.exception(e)

    #从帖子内容中解析帖子的摘要
    re_pattern_content = re.compile(
        r'''<meta name="description" content="(.*)" />''')
    result_content = re_pattern_content.search(post_resp_content)

    post_content = None
    post_update_time = None
    post_create_time = None

    if result_content is None:
        logging.warn('Request failed.')
    else:
        post_content = result_content.groups()[0]

    if post_content is None:
        logging.warn('Get post conetent failed.')
    else:
        #从帖子内容中解析帖子更新时间
        re_pattern_update_time = re.compile(r'''于 (.*) 编辑''')
        result_update_time = re_pattern_update_time.search(post_content)
        if result_update_time is None:
            pass
        else:
            post_update_time = result_update_time.groups()[0]

    if post_update_time is None:
        #从帖子内容中解析帖子发表时间
        re_pattern_create_time = re.compile(r'''<em id=".+">发表于 (.+)</em>''')
        result_create_time = re_pattern_create_time.search(post_resp_content)

        if result_create_time is None:
            logging.warn('Get post time failed.')
        else:
            post_create_time = result_create_time.groups()[0]
    else:
        post_create_time = post_update_time

    post_create_time_datetime = datetime.strptime(post_create_time,
                                                  '%Y-%m-%d %H:%M')
    post_create_time_stamp = post_create_time_datetime.timestamp()

    post = None

    #比较帖子时间和上次爬取时间,如果大于上次爬取时间,则视为本次爬取目标
    if (post_create_time_stamp - time_last_time) > 0:

        conn = mysql.connector.connect(user=db_user,
                                       password=db_passwd,
                                       database=db_name)
        cursor = conn.cursor()
        logging.info('post_type:' + post_type)
        logging.info('post_name:' + post_name)
        logging.info('post_url:' + post_full_url)
        logging.info('post_create_time:' + post_create_time)
        logging.info('post_content:' + post_content)

        post_id = next_id()
        post = Post(id=post_id,
                    post_type=post_type,
                    post_title=post_name,
                    post_owner='hipda',
                    post_content=post_content,
                    post_link=post_full_url,
                    post_time=post_create_time)
        # post.save()
        cursor.execute(
            'insert into posts (id, post_type, post_title, post_owner, post_content, post_link, post_time, created_at ) values (%s, %s, %s, %s, %s, %s, %s, %s)',
            [
                post_id, post_type, post_name, 'hipda', post_content,
                post_full_url, post_create_time_stamp, post_create_time_stamp
            ])
        conn.commit()
        cursor.close()
        conn.close()
        time.sleep(1)
    else:
        logging.info('Post time[%s] is not after last time.' %
                     post_create_time_datetime)

    return post
Exemplo n.º 28
0
 def getRequest(self):
     return urllib2.Request(self.url, HttpRequest.urlencode(self.params))