示例#1
0
    def __init__(self, user, passwd):
        self.user = user
        self.passwd = passwd
        self.conn = ValidatedHTTPSConnection(ROBOT_HOST)
        self.logger = logging.getLogger("Robot of {0}".format(user))

        # Provide this as a way to easily add unsupported API features.
        self.scraper = RobotWebInterface(user, passwd)
示例#2
0
 def connect(self, force=False):
     """
     Establish a connection to the robot web interface if we're not yet
     connected. If 'force' is set to True, throw away the old connection and
     establish a new one, regardless of whether we are connected or not.
     """
     if force and self.conn is not None:
         self.conn.close()
         self.conn = None
     if self.conn is None:
         self.conn = ValidatedHTTPSConnection(ROBOT_WEBHOST)
示例#3
0
    def login(self, user=None, passwd=None, force=False):
        """
        Log into the robot web interface using self.user and self.passwd. If
        user/passwd is provided as arguments, those are used instead and
        self.user/self.passwd are updated accordingly.
        """
        if self.logged_in and not force:
            return

        self.connect(force=force)

        # Update self.user and self.passwd in case we need to re-establish the
        # connection.
        if user is not None:
            self.user = user
        if passwd is not None:
            self.passwd = passwd

        if self.user is None or self.passwd is None:
            raise WebRobotError("Login credentials for the web user interface "
                                "are missing.")

        if self.user.startswith("#ws+"):
            raise WebRobotError("The user {0} is a dedicated web service user "
                                "and cannot be used for scraping the web user "
                                "interface.".format(self.user))

        # We need to first visit the Robot so that we later get an OAuth token
        # for the Robot from the authentication site.
        self.logger.debug("Visiting Robot web frontend for the first time.")
        auth_url = self.request('/', xhr=False).getheader('location')

        if not auth_url.startswith('https://' + ROBOT_LOGINHOST + '/'):
            msg = "https://{0}/ does not redirect to https://{1}/ " \
                  "but instead redirects to: {2}"
            raise WebRobotError(
                msg.format(ROBOT_WEBHOST, ROBOT_LOGINHOST, auth_url))

        self.logger.debug("Following authentication redirect to %r.", auth_url)

        # This is primarily for getting a first session cookie.
        login_conn = ValidatedHTTPSConnection(ROBOT_LOGINHOST)
        login_conn.request('GET', auth_url[len(ROBOT_LOGINHOST) + 8:], None)

        response = login_conn.getresponse()
        if response.status != 302:
            raise WebRobotError("Invalid status code {0} while visiting auth"
                                " URL".format(response.status))

        cookies = self._parse_cookies(response)
        if "PHPSESSID" not in cookies:
            msg = "Auth site didn't respond with a session cookie."
            raise WebRobotError(msg)

        self.logger.debug("Session ID for auth site is %r.",
                          cookies['PHPSESSID'])

        # Make sure that we always send the auth site's session ID in
        # subsequent requests.
        cookieval = '; '.join([k + '=' + v for k, v in cookies.items()])
        headers = {'Cookie': cookieval}

        self.logger.debug("Visiting login page at https://%s/login.",
                          ROBOT_LOGINHOST)

        # Note that the auth site doesn't seem to support keep-alives, so we
        # need to reconnect here.
        login_conn = ValidatedHTTPSConnection(ROBOT_LOGINHOST)
        login_conn.request('GET', "/login", None, headers)

        response = login_conn.getresponse()
        if response.status != 200:
            raise WebRobotError("Invalid status code {0} while visiting login"
                                " page".format(response.status))

        data = urlencode({'_username': self.user, '_password': self.passwd})
        self.logger.debug("Logging in to auth site with user %s.", self.user)

        # Again, we need to reconnect here.
        login_conn = ValidatedHTTPSConnection(ROBOT_LOGINHOST)
        post_headers = headers.copy()
        post_headers['Content-Type'] = 'application/x-www-form-urlencoded'
        login_conn.request('POST', '/login_check', data, post_headers)
        response = login_conn.getresponse()

        # Here, if the authentication is successful another session is started
        # and we get a new session ID.
        cookies = self._parse_cookies(response)
        if "PHPSESSID" not in cookies:
            raise WebRobotError("Login to robot web interface failed.")
        self.logger.debug("New session ID for auth site after login is %r.",
                          cookies['PHPSESSID'])
        cookieval = '; '.join([k + '=' + v for k, v in cookies.items()])
        headers['Cookie'] = cookieval

        # This should be the actual OAuth authorization URL.
        location = response.getheader('Location')

        if response.status != 302 or location is None:
            raise WebRobotError("Unable to get OAuth authorization URL.")

        if not location.startswith('https://' + ROBOT_LOGINHOST + '/'):
            msg = "https://{0}/ does not redirect to https://{1}/ " \
                  "but instead redirects to: {2}"
            raise WebRobotError(
                msg.format(ROBOT_LOGINHOST, ROBOT_LOGINHOST, location))

        self.logger.debug("Got redirected, visiting %r.", location)

        login_conn = ValidatedHTTPSConnection(ROBOT_LOGINHOST)
        login_conn.request('GET', location[len(ROBOT_LOGINHOST) + 8:], None,
                           headers)
        response = login_conn.getresponse()

        # We now should get an URL back to the Robot web interface.
        location = response.getheader('Location')
        if response.status != 302 or location is None:
            raise WebRobotError("Failed to get OAuth URL for Robot.")
        if not location.startswith('https://' + ROBOT_WEBHOST + '/'):
            msg = "https://{0}/ does not redirect to https://{1}/ " \
                  "but instead redirects to: {2}"
            raise WebRobotError(
                msg.format(ROBOT_LOGINHOST, ROBOT_WEBHOST, auth_url))

        self.logger.debug("Going back to Robot web interface via %r.",
                          location)

        # Reconnect to Robot with the OAuth token.
        self.connect(force=True)
        response = self.request(location[len(ROBOT_WEBHOST) + 8:], xhr=False)

        if response.status != 302:
            raise WebRobotError("Status after providing OAuth token should be"
                                " 302 and not {0}".format(response.status))

        if response.getheader('location') != 'https://' + ROBOT_WEBHOST + '/':
            raise WebRobotError("Robot login with OAuth token has failed.")

        self.logged_in = True