示例#1
0
 def GET(self):
     result = {}
     action = web.input(action = None)['action']
     
     if action=='quota': 
         results = urlfetch.fetch('https://api.stackexchange.com/2.0/info?site=stackoverflow&key=%s' % api_key, headers = {'User-Agent': 'StackPrinter'}, deadline = 10)
         response = simplejson.loads(results.content)
         result['result'] = response
     if action=='quotaauth': 
         results = urlfetch.fetch('https://api.stackexchange.com/2.0/info?site=stackoverflow&key=%s&access_token=%s' % (api_key, TokenManager.get_auth_token()), headers = {'User-Agent': 'StackPrinter'}, deadline = 10)
         response = simplejson.loads(results.content)
         result['result'] = response
     if action=='authkey': 
         result['result'] = TokenManager.get_auth_token()
     elif action =='memcachestats':
         result = memcache.get_stats()        
     elif action =='memcacheflush':
         result['result'] = memcache.flush_all()
     elif action =='normalize':
         deferred.defer(worker.deferred_normalize_printed_question)    
         result['result'] = True
     elif action =='delete':
         service = web.input(service = None)['service']
         question_id = web.input(question_id = None)['question_id']
         result['printed_question_deletion'] = dbquestion.delete_printed_question(question_id,service)
         result['question_deletion'] = dbquestion.delete_question(question_id,service)
         result['answers_deletion'] = dbquestion.delete_answers(question_id,service)
     return render.admin(result)
示例#2
0
 def GET(self):
     result = {}
     action = web.input(action = None)['action']
     
     if action=='quota': 
         results = urlfetch.fetch('https://api.stackexchange.com/2.0/info?site=stackoverflow&key=%s' % api_key, headers = {'User-Agent': 'StackPrinter'}, deadline = 10)
         response = simplejson.loads(results.content)
         result['result'] = response
     if action=='quotaauth': 
         results = urlfetch.fetch('https://api.stackexchange.com/2.0/info?site=stackoverflow&key=%s&access_token=%s' % (api_key, TokenManager.get_auth_token()), headers = {'User-Agent': 'StackPrinter'}, deadline = 10)
         response = simplejson.loads(results.content)
         result['result'] = response
     if action=='authkey': 
         result['result'] = TokenManager.get_auth_token()
     elif action =='memcachestats':
         result = memcache.get_stats()        
     elif action =='memcacheflush':
         result['result'] = memcache.flush_all()
     elif action =='normalize':
         deferred.defer(worker.deferred_normalize_printed_question)    
         result['result'] = True
     elif action =='delete':
         service = web.input(service = None)['service']
         question_id = web.input(question_id = None)['question_id']
         result['printed_question_deletion'] = dbquestion.delete_printed_question(question_id,service)
         result['question_deletion'] = dbquestion.delete_question(question_id,service)
         result['answers_deletion'] = dbquestion.delete_answers(question_id,service)
     return render.admin(result)
示例#3
0
def get_sites():
    """
    Get a list of Stack Exchange sites using Stackauth service
    """
    results = __gae_fetch('https://api.stackexchange.com/%s/sites?pagesize=999&key=%s' % (__api_version, api_key))
    response = simplejson.loads(results.content)
    return response
示例#4
0
def invalidate_auth_token(auth_token):
    """
    Invalidate the given auth_token
    """

    results = __gae_fetch('https://api.stackexchange.com/%s/access-tokens/%s/invalidate' % (__api_version, auth_token))
    response = simplejson.loads(results.content)
    return response
示例#5
0
def get_sites():
    """
    Get a list of Stack Exchange sites using Stackauth service
    """
    results = __gae_fetch(
        'https://api.stackexchange.com/%s/sites?pagesize=999&key=%s' %
        (__api_version, api_key))
    response = simplejson.loads(results.content)
    return response
示例#6
0
def invalidate_auth_token(auth_token):
    """
    Invalidate the given auth_token
    """

    results = __gae_fetch(
        'https://api.stackexchange.com/%s/access-tokens/%s/invalidate' %
        (__api_version, auth_token))
    response = simplejson.loads(results.content)
    return response
示例#7
0
def check_link_weight(link):
   base_link = get_base_link(link)
   if base_link == 'http://stackoverflow.com':
       question_id = ContentDiscoverer(link).get_id()
       if question_id:
           results = urlfetch.fetch('https://api.stackexchange.com/2.0/questions/%s?order=desc&sort=activity&site=stackoverflow&filter=!-T4d7xQ6' % question_id, headers = {'User-Agent': 'StackPrinter'}, deadline = 10)
           response = simplejson.loads(results.content)
           question = response['items'][0]
           return (question['score']) >= 3
       else:
           return False
   return True    
示例#8
0
def handle_response(results, url = None):
    """
    Load results in JSON
    """
    #When request is throttled, API simply closes the door without any response
    try:
        response = simplejson.loads(results.content)
    except simplejson.JSONDecodeError:
        raise ApiRequestError(url, CODE_API_ERROR_THROTTLING, API_ERROR_THROTTLING) 
    if "error" in response:
        error = response["error"]
        code = error["code"]
        message = error["message"]
        raise ApiRequestError(url, code, message)
    return response    
示例#9
0
    def fetch(self, path, results_key, **url_params):
        """
        Fetches all the results for a given path where path is the API URL path.
        results_key is the key of the results list. If url_params is given it's
        key/value pairs are used to build the API query string.
        """
        base_url = "%s/%s/%s" % (self._name, self._version, path)
        params = {
            "key": self._api_key,
            "pagesize": self._page_size,
            "page": self._start_page
            }

        params.update(url_params)

        while True:
            query = urllib.urlencode(params)
            url = "%s?%s" % (base_url, query)
            data = self._get_response_data(url)
            response = json.loads(data)
            count = 0

            if "error" in response:
                error = response["error"]
                code = error["Code"]
                message = error["Message"]

                raise APIError(url, code, message)

            if results_key:
                results = response[results_key]
            else:
                results = response

            if len(results) < 1:
                break

            for result in results:
                yield result

            if len(results) < params["pagesize"]:
                break

            params["page"] += 1
示例#10
0
def handle_response(results, url = None):
    """
    Load results in JSON
    """
    #When request is throttled, API simply closes the door without any response

    try:
        response = simplejson.loads(results.content)
    except:
        raise ApiRequestError(url, CODE_API_ERROR_THROTTLING, API_ERROR_THROTTLING) 
    if "backoff" in response:
        logging.info('Backoff warning found! Value: %s Url: %s' % (response["backoff"], url))
        memcache.set('backoff', response["backoff"],response["backoff"])
 
    if "error_id" in response:
        error = response["error_name"]
        code = response["error_id"]
        message = response["error_message"]
        raise ApiRequestError(url, code, message)
    return response    
示例#11
0
def handle_response(results, url=None):
    """
    Load results in JSON
    """
    #When request is throttled, API simply closes the door without any response

    try:
        response = simplejson.loads(results.content)
    except:
        raise ApiRequestError(url, CODE_API_ERROR_THROTTLING,
                              API_ERROR_THROTTLING)
    if "backoff" in response:
        logging.info('Backoff warning found! Value: %s Url: %s' %
                     (response["backoff"], url))
        memcache.set('backoff', response["backoff"], response["backoff"])

    if "error_id" in response:
        error = response["error_name"]
        code = response["error_id"]
        message = response["error_message"]
        raise ApiRequestError(url, code, message)
    return response
示例#12
0
    def get_tags_of_user(self, username):
        """
        Retrieves user's public tags and their tag counts from Delicious.com.
        The tags represent a user's full public tagging vocabulary.

        DeliciousAPI uses the official JSON feed of the user. We could use
        RSS here, but the JSON feed has proven to be faster in practice.

        @param username: The Delicious.com username.
        @type username: str

        @return: Dictionary mapping tags to their tag counts.

        """
        tags = {}
        path = "/v2/json/tags/%s" % username
        data = self._query(path, host="feeds.delicious.com")
        if data:
            try:
                tags = simplejson.loads(data)
            except TypeError:
                pass
        return tags
示例#13
0
    def get_urls(self, tag=None, popular=True, max_urls=100, sleep_seconds=1):
        """
        Returns the list of recent URLs (of web documents) tagged with a given tag.

        This is very similar to parsing Delicious' RSS/JSON feeds directly,
        but this function will return up to 2,000 links compared to a maximum
        of 100 links when using the official feeds (with query parameter
        count=100).

        The return list of links will be sorted by recency in descending order,
        i.e. newest items first.

        Note that even when setting max_urls, get_urls() cannot guarantee that
        it can retrieve *at least* this many URLs. It is really just an upper
        bound.

        @param tag: Retrieve links which have been tagged with the given tag.
            If tag is not set (default), links will be retrieved from the
            Delicious.com front page (aka "delicious hotlist").
        @type tag: unicode/str

        @param popular: If true (default), retrieve only popular links (i.e.
            /popular/<tag>). Otherwise, the most recent links tagged with
            the given tag will be retrieved (i.e. /tag/<tag>).

            As of January 2009, it seems that Delicious.com modified the list
            of popular tags to contain only up to a maximum of 15 URLs.
            This also means that setting max_urls to values larger than 15
            will not change the results of get_urls().
            So if you are interested in more URLs, set the "popular" parameter
            to false.

            Note that if you set popular to False, the returned list of URLs
            might contain duplicate items. This is due to the way Delicious.com
            creates its /tag/<tag> Web pages. So if you need a certain
            number of unique URLs, you have to take care of that in your
            own code.
        @type popular: bool

        @param max_urls: Retrieve at most max_urls links. The default is 100,
            which is the maximum number of links that can be retrieved by
            parsing the official JSON feeds. The maximum value of max_urls
            in practice is 2000 (currently). If it is set higher, Delicious
            will return the same links over and over again, giving lots of
            duplicate items.
        @type max_urls: int

        @param sleep_seconds: Optional, default: 1.
            Wait the specified number of seconds between subsequent queries in
            case that there are multiple pages of bookmarks for the given url.
            Must be greater than or equal to 1 to comply with Delicious.com's
            Terms of Use.
            See also parameter 'max_urls'.
        @type sleep_seconds: int

        @return: The list of recent URLs (of web documents) tagged with a given tag.

        """
        assert sleep_seconds >= 1
        urls = []
        path = None
        if tag is None or (tag is not None and max_urls > 0 and max_urls <= 100):
            # use official JSON feeds
            max_json_count = 100
            if tag:
                # tag-specific JSON feed
                if popular:
                    path = "/v2/json/popular/%s?count=%d" % (tag, max_json_count)
                else:
                    path = "/v2/json/tag/%s?count=%d" % (tag, max_json_count)
            else:
                # Delicious.com hotlist
                path = "/v2/json/?count=%d" % (max_json_count)
            data = self._query(path, host="feeds.delicious.com")
            if data:
                posts = []
                try:
                    posts = simplejson.loads(data)
                except TypeError:
                    pass

                for post in posts:
                    # url
                    try:
                        url = post['u']
                        if url:
                            urls.append(url)
                    except KeyError:
                        pass
        else:
            # maximum number of urls/posts Delicious.com will display
            # per page on its website
            max_html_count = 100
            # maximum number of pages that Delicious.com will display;
            # currently, the maximum number of pages is 20. Delicious.com
            # allows to go beyond page 20 via pagination, but page N (for
            # N > 20) will always display the same content as page 20.
            max_html_pages = 20

            if popular:
                path = "/popular/%s?setcount=%d" % (tag, max_html_count)
            else:
                path = "/tag/%s?setcount=%d" % (tag, max_html_count)

            page_index = 1
            urls = []
            while path and page_index <= max_html_pages:
                data = self._query(path)
                path = None
                if data:
                    # extract urls from current page
                    soup = BeautifulSoup(data)
                    links = soup.findAll("a", attrs={"class": re.compile("^taggedlink\s*")})
                    for link in links:
                        try:
                            url = link['href']
                            if url:
                                urls.append(url)
                        except KeyError:
                            pass

                    # check if there are more multiple pages of urls
                    soup = BeautifulSoup(data)
                    paginations = soup.findAll("div", id="pagination")
                    if paginations:
                        # find next path
                        nexts = paginations[0].findAll("a", attrs={ "class": "pn next" })
                        if nexts and (max_urls == 0 or len(urls) < max_urls) and len(urls) > 0:
                            # e.g. /url/2bb293d594a93e77d45c2caaf120e1b1?show=all&page=2
                            path = nexts[0]['href']
                            path += "&setcount=%d" % max_html_count
                            page_index += 1
                            # wait between queries to Delicious.com to be
                            # compliant with its Terms of Use
                            time.sleep(sleep_seconds)
        if max_urls > 0:
            return urls[:max_urls]
        else:
            return urls
示例#14
0
 def get_questions(self, api_site_parameter, page, **kwargs):
     return simplejson.loads(QUESTIONS)
示例#15
0
 def get_answers(self,question_id, api_site_parameter, **kwargs):
     if question_id != 0:
         return simplejson.loads(ANSWERS)
     else:
         return simplejson.loads(EMPTY_ANSWERS)
示例#16
0
 def get_answer(self, answer_id, api_site_parameter, **kwargs):
     if answer_id != 0:
         return simplejson.loads(ANSWER)
     else:
         return simplejson.loads(EMPTY_ANSWERS)
示例#17
0
 def get_questions_by_tags(self, tags, api_site_parameter, page, **kwargs):
     if tags == 'python':
         return simplejson.loads(QUESTIONS)
     if tags == 'atagthedoesnotexist':
         return simplejson.loads(EMPTY_QUESTIONS)
示例#18
0
 def get_tags(self, user_id, api_site_parameter, page, **kwargs):
     return simplejson.loads(TAGS)
示例#19
0
 def get_question(self, question_id, api_site_parameter, **kwargs):
     if question_id != 0:
         return simplejson.loads(QUESTION)
     else:
         return simplejson.loads(EMPTY_QUESTIONS)    
示例#20
0
    def get_network(self, username):
        """
        Returns the user's list of followees and followers.

        Followees are users in his Delicious "network", i.e. those users whose
        bookmark streams he's subscribed to. Followers are his Delicious.com
        "fans", i.e. those users who have subscribed to the given user's
        bookmark stream).

        Example:

                A -------->   --------> C
                D --------> B --------> E
                F -------->   --------> F

            followers               followees
            of B                    of B

        Arrows from user A to user B denote that A has subscribed to B's
        bookmark stream, i.e. A is "following" or "tracking" B.

        Note that user F is both a followee and a follower of B, i.e. F tracks
        B and vice versa. In Delicious.com terms, F is called a "mutual fan"
        of B.

        Comparing this network concept to information retrieval, one could say
        that followers are incoming links and followees outgoing links of B.

        @param username: Delicous.com username for which network information is
            retrieved.
        @type username: unicode/str

        @return: Tuple of two lists ([<followees>, [<followers>]), where each list
            contains tuples of (username, tracking_since_timestamp).
            If a network is set as private, i.e. hidden from public view,
            (None, None) is returned.
            If a network is public but empty, ([], []) is returned.

        """
        assert username
        followees = followers = None

        # followees (network members)
        path = "/v2/json/networkmembers/%s" % username
        data = None
        try:
            data = self._query(path, host="feeds.delicious.com")
        except DeliciousForbiddenError:
            pass
        if data:
            followees = []

            users = []
            try:
                users = simplejson.loads(data)
            except TypeError:
                pass

            uname = tracking_since = None

            for user in users:
                # followee's username
                try:
                    uname = user['user']
                except KeyError:
                    pass
                # try to convert uname to Unicode
                if uname:
                    try:
                        # we assume UTF-8 encoding
                        uname = uname.decode('utf-8')
                    except UnicodeDecodeError:
                        pass
                # time when the given user started tracking this user
                try:
                    tracking_since = datetime.datetime.strptime(user['dt'], "%Y-%m-%dT%H:%M:%SZ")
                except KeyError:
                    pass
                if uname:
                    followees.append( (uname, tracking_since) )

        # followers (network fans)
        path = "/v2/json/networkfans/%s" % username
        data = None
        try:
            data = self._query(path, host="feeds.delicious.com")
        except DeliciousForbiddenError:
            pass
        if data:
            followers = []

            users = []
            try:
                users = simplejson.loads(data)
            except TypeError:
                pass

            uname = tracking_since = None

            for user in users:
                # fan's username
                try:
                    uname = user['user']
                except KeyError:
                    pass
                # try to convert uname to Unicode
                if uname:
                    try:
                        # we assume UTF-8 encoding
                        uname = uname.decode('utf-8')
                    except UnicodeDecodeError:
                        pass
                # time when fan started tracking the given user
                try:
                    tracking_since = datetime.datetime.strptime(user['dt'], "%Y-%m-%dT%H:%M:%SZ")
                except KeyError:
                    pass
                if uname:
                    followers.append( (uname, tracking_since) )
        return ( followees, followers )
示例#21
0
 def get_users(self, filter, api_site_parameter, **kwargs):
     return simplejson.loads(USERS)
示例#22
0
 def get_answer(self, answer_id, api_site_parameter, **kwargs):
     if answer_id != 0:
         return simplejson.loads(ANSWER)
     else:
         return simplejson.loads(EMPTY_ANSWERS)
示例#23
0
 def get_users_by_id(self, user_id, api_site_parameter, **kwargs):
     return simplejson.loads(USERS)
示例#24
0
    def get_url(self, url, max_bookmarks=50, sleep_seconds=1):
        """
        Returns a DeliciousURL instance representing the Delicious.com history of url.

        Generally, this method is what you want for getting title, bookmark, tag,
        and user information about a URL.

        Delicious only returns up to 50 bookmarks per URL. This means that
        we have to do subsequent queries plus parsing if we want to retrieve
        more than 50. Roughly speaking, the processing time of get_url()
        increases linearly with the number of 50-bookmarks-chunks; i.e.
        it will take 10 times longer to retrieve 500 bookmarks than 50.

        @param url: The URL of the web document to be queried for.
        @type url: str

        @param max_bookmarks: Optional, default: 50.
            See the documentation of get_bookmarks() for more information
            as get_url() uses get_bookmarks() to retrieve a url's
            bookmarking history.
        @type max_bookmarks: int

        @param sleep_seconds: Optional, default: 1.
            See the documentation of get_bookmarks() for more information
            as get_url() uses get_bookmarks() to retrieve a url's
            bookmarking history. sleep_seconds must be >= 1 to comply with
            Delicious.com's Terms of Use.
        @type sleep_seconds: int

        @return: DeliciousURL instance representing the Delicious.com history
            of url.

        """
        # we must wait at least 1 second between subsequent queries to
        # comply with Delicious.com's Terms of Use
        assert sleep_seconds >= 1

        document = DeliciousURL(url)

        m = hashlib.md5()
        m.update(url)
        hash = m.hexdigest()

        path = "/v2/json/urlinfo/%s" % hash
        data = self._query(path, host="feeds.delicious.com")
        if data:
            urlinfo = {}
            try:
                urlinfo = simplejson.loads(data)
                if urlinfo:
                    urlinfo = urlinfo[0]
                else:
                    urlinfo = {}
            except TypeError:
                pass
            try:
                document.title = urlinfo['title'] or u""
            except KeyError:
                pass
            try:
                top_tags = urlinfo['top_tags'] or {}
                if top_tags:
                    document.top_tags = sorted(top_tags.iteritems(), key=itemgetter(1), reverse=True)
                else:
                    document.top_tags = []
            except KeyError:
                pass
            try:
                document.total_bookmarks = int(urlinfo['total_posts'])
            except (KeyError, ValueError):
                pass
            document.bookmarks = self.get_bookmarks(url=url, max_bookmarks=max_bookmarks, sleep_seconds=sleep_seconds)


        return document
示例#25
0
 def get_favorites_questions(self, user_id, api_site_parameter, page,
                             **kwargs):
     return simplejson.loads(QUESTIONS)
示例#26
0
 def get_questions_by_tags(self, tags, api_site_parameter, page, **kwargs):
     if tags == 'python':
         return simplejson.loads(QUESTIONS)
     if tags == 'atagthedoesnotexist':
         return simplejson.loads(EMPTY_QUESTIONS)
示例#27
0
 def get_question(self, question_id, api_site_parameter, **kwargs):
     if question_id != 0:
         return simplejson.loads(QUESTION)
     else:
         return simplejson.loads(EMPTY_QUESTIONS)
示例#28
0
 def get_tags(self, user_id, api_site_parameter, page, **kwargs):
     return simplejson.loads(TAGS)
示例#29
0
    def get_user(self, username, password=None, max_bookmarks=50, sleep_seconds=1):
        """Retrieves a user's bookmarks from Delicious.com.

        If a correct username AND password are supplied, a user's *full*
        bookmark collection (which also includes private bookmarks) is
        retrieved. Data communication is encrypted using SSL in this case.

        If no password is supplied, only the *public* bookmarks of the user
        are retrieved. Here, the parameter 'max_bookmarks' specifies how
        many public bookmarks will be retrieved (default: 50). Set the
        parameter to 0 to retrieve all public bookmarks.

        This function can be used to backup all of a user's bookmarks if
        called with a username and password.

        @param username: The Delicious.com username.
        @type username: str

        @param password: Optional, default: None.
            The user's Delicious.com password. If password is set,
            all communication with Delicious.com is SSL-encrypted.
        @type password: unicode/str

        @param max_bookmarks: Optional, default: 50.
            See the documentation of get_bookmarks() for more
            information as get_url() uses get_bookmarks() to
            retrieve a url's bookmarking history.
            The parameter is NOT used when a password is specified
            because in this case the *full* bookmark collection of
            a user will be retrieved.
        @type max_bookmarks: int

        @param sleep_seconds: Optional, default: 1.
            See the documentation of get_bookmarks() for more information as
            get_url() uses get_bookmarks() to retrieve a url's bookmarking
            history. sleep_seconds must be >= 1 to comply with Delicious.com's
            Terms of Use.
        @type sleep_seconds: int

        @return: DeliciousUser instance

        """
        assert username
        user = DeliciousUser(username)
        bookmarks = []
        if password:
            # We have username AND password, so we call
            # the official Delicious.com API.
            path = "/v1/posts/all"
            data = self._query(path, host="api.del.icio.us", use_ssl=True, user=username, password=password)
            if data:
                soup = BeautifulSoup(data)
                elements = soup.findAll("post")
                for element in elements:
                    url = element["href"]
                    title = element["description"] or u""
                    comment = element["extended"] or u""
                    tags = []
                    if element["tag"]:
                        tags = element["tag"].split()
                    timestamp = datetime.datetime.strptime(element["time"], "%Y-%m-%dT%H:%M:%SZ")
                    bookmarks.append( (url, tags, title, comment, timestamp) )
            user.bookmarks = bookmarks
        else:
            # We have only the username, so we extract data from
            # the user's JSON feed. However, the feed is restricted
            # to the most recent public bookmarks of the user, which
            # is about 100 if any. So if we need more than 100, we start
            # scraping the Delicious.com website directly
            if max_bookmarks > 0 and max_bookmarks <= 100:
                path = "/v2/json/%s/stackoverflow?count=100" % username
                data = self._query(path, host="feeds.delicious.com", user=username)
                if data:
                    posts = []
                    try:
                        posts = simplejson.loads(data)
                    except TypeError:
                        pass

                    url = timestamp = None
                    title = comment = u""
                    tags = []

                    for post in posts:
                        # url
                        try:
                            url = post['u']
                        except KeyError:
                            pass
                        # title
                        try:
                            title = post['d']
                        except KeyError:
                            pass
                        # tags
                        try:
                            tags = post['t']
                        except KeyError:
                            pass
                        if not tags:
                            tags = [u"system:unfiled"]
                        # comment / notes
                        try:
                            comment = post['n']
                        except KeyError:
                            pass
                        # bookmark creation time
                        try:
                            timestamp = datetime.datetime.strptime(post['dt'], "%Y-%m-%dT%H:%M:%SZ")
                        except KeyError:
                            pass
                        bookmarks.append( (url, tags, title, comment, timestamp) )
                    user.bookmarks = bookmarks[:max_bookmarks]
            else:
                # TODO: retrieve the first 100 bookmarks via JSON before
                #       falling back to scraping the delicous.com website
                user.bookmarks = self.get_bookmarks(username=username, max_bookmarks=max_bookmarks, sleep_seconds=sleep_seconds)
        return user
示例#30
0
 def get_users(self, filter, api_site_parameter, **kwargs):
     return simplejson.loads(USERS)
示例#31
0
 def get_users_by_id(self, user_id, api_site_parameter, **kwargs):
     return simplejson.loads(USERS)
示例#32
0
 def get_answers(self, question_id, api_site_parameter, **kwargs):
     if question_id != 0:
         return simplejson.loads(ANSWERS)
     else:
         return simplejson.loads(EMPTY_ANSWERS)