def test_copy(self): cid = CaseInsensitiveDict({ 'Accept': 'application/json', 'user-Agent': 'requests', }) cid_copy = cid.copy() assert cid == cid_copy cid['changed'] = True assert cid != cid_copy
class TestCaseInsensitiveDict: @pytest.fixture(autouse=True) def setup(self): """CaseInsensitiveDict instance with "Accept" header.""" self.case_insensitive_dict = CaseInsensitiveDict() self.case_insensitive_dict["Accept"] = "application/json" def test_list(self): assert list(self.case_insensitive_dict) == ["Accept"] possible_keys = pytest.mark.parametrize( "key", ("accept", "ACCEPT", "aCcEpT", "Accept") ) @possible_keys def test_getitem(self, key): assert self.case_insensitive_dict[key] == "application/json" @possible_keys def test_delitem(self, key): del self.case_insensitive_dict[key] assert key not in self.case_insensitive_dict def test_lower_items(self): assert list(self.case_insensitive_dict.lower_items()) == [ ("accept", "application/json") ] def test_repr(self): assert repr(self.case_insensitive_dict) == "{'Accept': 'application/json'}" def test_copy(self): copy = self.case_insensitive_dict.copy() assert copy is not self.case_insensitive_dict assert copy == self.case_insensitive_dict @pytest.mark.parametrize( "other, result", ( ({"AccePT": "application/json"}, True), ({}, False), (None, False), ), ) def test_instance_equality(self, other, result): assert (self.case_insensitive_dict == other) is result
class TestCaseInsensitiveDict: @pytest.fixture(autouse=True) def setup(self): """ CaseInsensitiveDict instance with "Accept" header. """ self.case_insensitive_dict = CaseInsensitiveDict() self.case_insensitive_dict['Accept'] = 'application/json' def test_list(self): assert list(self.case_insensitive_dict) == ['Accept'] possible_keys = pytest.mark.parametrize('key', ('accept', 'ACCEPT', 'aCcEpT', 'Accept')) @possible_keys def test_getitem(self, key): assert self.case_insensitive_dict[key] == 'application/json' @possible_keys def test_delitem(self, key): del self.case_insensitive_dict[key] assert key not in self.case_insensitive_dict def test_lower_items(self): assert list(self.case_insensitive_dict.lower_items()) == [('accept', 'application/json')] def test_repr(self): assert repr(self.case_insensitive_dict) == "{'Accept': 'application/json'}" def test_copy(self): copy = self.case_insensitive_dict.copy() assert copy is not self.case_insensitive_dict assert copy == self.case_insensitive_dict @pytest.mark.parametrize( 'other, result', ( ({'AccePT': 'application/json'}, True), ({}, False), (None, False) ) ) def test_instance_equality(self, other, result): assert (self.case_insensitive_dict == other) is result
class Client: DEFAULT_PROTOCOL = "messagepack" def __init__( self, url, version=None, protocol=DEFAULT_PROTOCOL, path=None, request="", timeout=None, dnscache=None, headers=None, auth=None, stream=False, log=DUMMY_LOG, raise_exception=True, ): headers = headers or {} self._url = url self._version = version self._protocol = protocol # FIXME: check validity self._path = path or [] self._request = request self._timeout = timeout self._dnscache = dnscache self._headers = CaseInsensitiveDict(headers) self._auth = auth self._stream = stream self._log = log self._raise_exception = raise_exception if not self._dnscache: self._dnscache = DNSCache() def _get_state(self): return dict( url=self._url, version=self._version, protocol=self._protocol, path=self._path, request=self._request, timeout=self._timeout, dnscache=self._dnscache, headers=self._headers, auth=self._auth, stream=self._stream, log=self._log, raise_exception=self._raise_exception, ) def _copy(self, **kwargs): _kwargs = self._get_state() _kwargs.update(kwargs) return Client(**_kwargs) def _prepare_request(self, post_body, get_params=None): headers = self._headers.copy() if self._request: for hk, hv in self._request.headers.items(): if not hk.lower().startswith("x-kwikapi-"): continue headers[hk] = hv headers[REQUEST_ID_HEADER] = self._request.id headers[PROTOCOL_HEADER] = self._protocol upath = [self._version] + self._path upath = "/".join(x for x in upath if x) url = urljoin(self._url, upath) if get_params: url = "{}?{}".format(url, urlencode(get_params)) url = self._dnscache.map_url(url) if self._auth: self._auth.sign(url, headers, post_body) return url, post_body, headers def _make_request(self, url, post_body, headers): req = urllib.request.Request(url, data=post_body, headers=headers) res = urllib.request.urlopen(req) proto = PROTOCOLS[res.headers.get("X-KwikAPI-Protocol", self._protocol)] if self._stream: res = proto.deserialize_stream(res) res = Client._extract_stream_response(res, self._raise_exception) else: res = self._deserialize_response(res.read(), proto, self._raise_exception) return res @staticmethod def _deserialize_response(data, proto, raise_exception=True): proto = Client._get_protocol( proto) # Checking for a valid protocol object r = proto.deserialize(data) return Client._extract_response(r, raise_exception) @staticmethod def _get_protocol(proto): if isinstance(proto, str): return PROTOCOLS[proto] else: return proto @staticmethod def _extract_response(r, raise_exception=True): success = r["success"] if not success: r.pop("success") r = ResponseError(r) if raise_exception: raise r else: r = r["result"] return r @staticmethod def _extract_stream_response(res, raise_exception=True): for r in res: yield Client._extract_response(r, raise_exception) @staticmethod def _serialize_params(params, protocol): proto = PROTOCOLS[protocol] data = proto.serialize(params) return data def __call__(self, *args, **kwargs): if args: raise NonKeywordArgumentsError(args) if self._path: # FIXME: support streaming in both directions _kwargs = get_loggable_params(kwargs or {}) self._log.debug( "kwikapi.client.__call__", path=self._path, kwargs=_kwargs, url=self._url, version=self._version, protocol=self._protocol, ) post_body = self._serialize_params(kwargs, self._protocol) url, post_body, headers = self._prepare_request(post_body) res = self._make_request(url, post_body, headers) return res else: return self._copy(**kwargs) def __getattr__(self, attr): return self._copy(path=self._path + [attr])
class BasePixivAPI(object): client_id = 'MOBrBDS8blbauoSck0ZfDbtuzpyT' client_secret = 'lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj' hash_secret = '28c1fdd170a5204386cb1313c7077b34f83e4aaf4aa829ce78c231e05b0bae2c' def __init__(self, **requests_kwargs): """initialize requests kwargs if need be""" self.user_id = 0 self.access_token = None self.refresh_token = None # self.requests = requests.Session() self.requests = cloudscraper.create_scraper() # fix due to #140 self.additional_headers = CaseInsensitiveDict( requests_kwargs.pop('headers', {})) self.requests_kwargs = requests_kwargs def set_additional_headers(self, headers): """manually specify additional headers. will overwrite API default headers in case of collision""" self.additional_headers = CaseInsensitiveDict(headers) # 设置HTTP的Accept-Language (用于获取tags的对应语言translated_name) # language: en-us, zh-cn, ... def set_accept_language(self, language): """set header Accept-Language for all requests (useful for get tags.translated_name)""" self.additional_headers['Accept-Language'] = language @classmethod def parse_json(cls, json_str): """parse str into JsonDict""" return json.loads(json_str, object_hook=JsonDict) def require_auth(self): if self.access_token is None: raise PixivError( 'Authentication required! Call login() or set_auth() first!') def requests_call(self, method, url, headers=None, params=None, data=None, stream=False): """ requests http/https call for Pixiv API """ merged_headers = self.additional_headers.copy() if headers: # Use the headers in the parameter to override the # additional_headers setting. merged_headers.update(headers) try: if method == 'GET': return self.requests.get(url, params=params, headers=merged_headers, stream=stream, **self.requests_kwargs) elif method == 'POST': return self.requests.post(url, params=params, data=data, headers=merged_headers, stream=stream, **self.requests_kwargs) elif method == 'DELETE': return self.requests.delete(url, params=params, data=data, headers=merged_headers, stream=stream, **self.requests_kwargs) except Exception as e: raise PixivError('requests %s %s error: %s' % (method, url, e)) raise PixivError('Unknown method: %s' % method) def set_auth(self, access_token, refresh_token=None): self.access_token = access_token self.refresh_token = refresh_token def login(self, username, password): return self.auth(username=username, password=password) def set_client(self, client_id, client_secret): self.client_id = client_id self.client_secret = client_secret def auth(self, username=None, password=None, refresh_token=None, headers=None): """Login with password, or use the refresh_token to acquire a new bearer token""" local_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S+00:00') headers = CaseInsensitiveDict(headers or {}) headers['x-client-time'] = local_time headers['x-client-hash'] = hashlib.md5( (local_time + self.hash_secret).encode('utf-8')).hexdigest() # Allow mock UA due to #171: https://github.com/upbit/pixivpy/issues/171 if 'user-agent' not in headers: headers['app-os'] = 'ios' headers['app-os-version'] = '14.6' headers['user-agent'] = 'PixivIOSApp/7.13.3 (iOS 14.6; iPhone13,2)' # noinspection PyUnresolvedReferences if not hasattr(self, 'hosts') or self.hosts == 'https://app-api.pixiv.net': auth_hosts = 'https://oauth.secure.pixiv.net' else: # noinspection PyUnresolvedReferences auth_hosts = self.hosts # BAPI解析成IP的场景 headers['host'] = 'oauth.secure.pixiv.net' url = '%s/auth/token' % auth_hosts data = { 'get_secure_url': 1, 'client_id': self.client_id, 'client_secret': self.client_secret, } if username and password: data['grant_type'] = 'password' data['username'] = username data['password'] = password elif refresh_token or self.refresh_token: data['grant_type'] = 'refresh_token' data['refresh_token'] = refresh_token or self.refresh_token else: raise PixivError( '[ERROR] auth() but no password or refresh_token is set.') r = self.requests_call('POST', url, headers=headers, data=data) if r.status_code not in {200, 301, 302}: if data['grant_type'] == 'password': raise PixivError( '[ERROR] auth() failed! check username and password.\nHTTP %s: %s' % (r.status_code, r.text), header=r.headers, body=r.text, ) else: raise PixivError( '[ERROR] auth() failed! check refresh_token.\nHTTP %s: %s' % (r.status_code, r.text), header=r.headers, body=r.text, ) token = None try: # get access_token token = self.parse_json(r.text) self.user_id = token.response.user.id self.access_token = token.response.access_token self.refresh_token = token.response.refresh_token except json.JSONDecodeError: raise PixivError('Get access_token error! Response: %s' % token, header=r.headers, body=r.text) # return auth/token response return token def download(self, url, prefix='', path=os.path.curdir, name=None, replace=False, fname=None, referer='https://app-api.pixiv.net/'): """Download image to file (use 6.0 app-api)""" if hasattr(fname, 'write'): # A file-like object has been provided. file = fname else: # Determine file path by parameters. name = prefix + (name or fname or os.path.basename(url)) file = os.path.join(path, name) if os.path.exists(file) and not replace: return False with self.requests_call('GET', url, headers={'Referer': referer}, stream=True) as response: if isinstance(file, str): with open(file, 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) else: shutil.copyfileobj(response.raw, file) return True
class Site: """ This object represents a MediaWiki API endpoint, e.g. https://en.wikipedia.org/w/api.php * url: Full url to site's api.php * session: current request.session object * log: an object that will be used for logging. ConsoleLog is created by default """ def __init__(self, url, headers=None, session=None, logger=None, json_object_hook=None): """ Create a new Site object with a given MediaWiki API endpoint. You should always set a `User-Agent` header to identify your bot and allow site owner to contact you in case your bot misbehaves. By default, User-Agent is set to the dir name + script name of your bot. :param str url: API endpoint URL, e.g. https://en.wikipedia.org/w/api.php :param Union[dict, CaseInsensitiveDict] headers: Optional headers as a dict. :param requests.Session session: Allows user-supplied custom Session parameters, e.g. retries. :param logging.Logger logger: Optional logger object for custom log output :param object json_object_hook: use this param to set a custom json object creator, e.g. pywikiapi.AttrDict. AttrDict allows direct property access to the result, e.g response.query.allpages in addition to response['query']['allpages'] """ if logger is None: self.logger = logging.getLogger('pywikiapi') self.logger.setLevel(logging.INFO) else: self.logger = logger self.json_object_hook = json_object_hook self.session = session if session else requests.Session() self.url = url self.tokens = {} self.no_ssl = False # For non-ssl sites, might be needed to avoid HTTPS self._is_bot = None # Will be set by the is_bot() self.maxlag = 5 # See https://www.mediawiki.org/wiki/Manual:Maxlag_parameter # If request is bigger than this, use POST instead self.auto_post_min_size = 2000 # Number of retries to do in case of the lag error. # 0 - don't retry. negative - infinite. self.retry_on_lag_error = 10 # This var will contain (username,password) after the .login() # in case of the login-on-demand mode self._loginOnDemand = False # type: Union[Tuple[str, str], bool] self.logged_in = False self.headers = CaseInsensitiveDict() if headers: self.headers.update(headers) if u'User-Agent' not in self.headers: try: script = Path(sys.modules['__main__'].__file__) except (KeyError, AttributeError): script = Path(sys.executable) self.headers[u'User-Agent'] = \ f'{script.parent.parent.name}-{script.name} pywikiapi/4.3.0' def __call__(self, action, **kwargs): """ Make an API call with any arguments provided as named values: data = site('query', meta='siteinfo') By default uses GET request to the default URL set in the Site constructor. In case of an error, ApiError exception will be raised Any warnings will be logged via the logging interface :param str action : any of the MW API actions, e.g. 'query' and 'login' Several special "magic" parameters could be used to customize api call. Special parameters must be all CAPS to avoid collisions with the server API: :param POST: Use POST method when calling server API. Value is ignored. :param HTTPS: Force https (ssl) protocol for this request. Value is ignored. :param SSL: Same as HTTPS :param EXTRAS: Any extra parameters as passed to requests session.request(). Value is a dict() :param NO_LOGIN: do not attempt to do a login step if True """ if self._loginOnDemand and action != 'login' and ( 'NO_LOGIN' not in kwargs or not kwargs['NO_LOGIN'] ): self.login(self._loginOnDemand[0], self._loginOnDemand[1]) method, request_kw = self._prepare_call(action, kwargs) try_count = 0 while True: try_count += 1 response = self.request(method, **request_kw) data = self.parse_json(response) try: if data['error']['code'] != 'maxlag': break except KeyError: break retry_after = float(response.headers.get('Retry-After', 5)) no_retry = 0 <= self.retry_on_lag_error < try_count if self.logger.isEnabledFor(logging.WARNING if no_retry else logging.INFO): # X-Database-Lag: The number of seconds of lag of the most lagged slave message = "Server exceeded maxlag" if not no_retry: message += f", retrying in {retry_after}s" if 'lag' in data['error']: message += f", lag={data['error']['lag']}" message += f", API={self.url}" log = self.logger.warning if no_retry else self.logger.info log(message, { 'code': 'maxlag-retry', 'retry-after': retry_after, 'lag': data['error']['lag'] if 'lag' in data['error'] else None, 'x-database-lag': response.headers.get('X-Database-Lag', 5) }) if no_retry: break time.sleep(retry_after) # Handle success and failure if 'error' in data: raise ApiError('Server API Error', data['error']) if 'warnings' in data and self.logger.isEnabledFor(logging.WARNING): message = '\n'.join(( str(vv[1]['warnings'] if 'warnings' in vv[1] else vv[1]) for vv in sorted(data['warnings'].items(), key=lambda v: '' if v[0] == 'main' else v[0]))) self.logger.warning(message, dict(code='server-warnings', warnings=data['warnings'])) return data def _prepare_call(self, action, kwargs): """ Prepares parameters before calling MW API :param str action: which MW API action to do :param dict kwargs: key-value parameters as passed to the self.__call__() :return: """ # Magic CAPS parameters method = 'POST' if 'POST' in kwargs or action in ['login', 'edit'] else 'GET' request_kw = dict() if 'EXTRAS' not in kwargs else kwargs['EXTRAS'] request_kw['force_ssl'] = \ not self.no_ssl and \ (action == 'login' or 'SSL' in kwargs or 'HTTPS' in kwargs) # Clean up magic CAPS params as they shouldn't be passed to the server for k in ['POST', 'SSL', 'HTTPS', 'EXTRAS', 'NO_LOGIN']: if k in kwargs: del kwargs[k] def update_value(value): if value is None: return None if isinstance(value, datetime): # .isoformat() wouldn't work because it sometimes # produces +00:00 that MW does not support # Also perform sanity check here to make sure this is a UTC time if value.tzinfo is not None and value.tzinfo.utcoffset(value): raise ValueError('datetime value has a non-UTC timezone') return value.strftime('%Y-%m-%dT%H:%M:%SZ') if isinstance(value, bool): return '1' if value else None return str(value) for k, val in list(kwargs.items()): # Support all iterables as lists except for strings try: iter(val) iterable = not isinstance(val, str) except TypeError: iterable = False if iterable: val = [update_value(v) for v in val] kwargs[k] = u'|'.join(filter(lambda v: v is not None, val)) else: val = update_value(val) if val is not None: kwargs[k] = val else: del kwargs[k] # Make server call kwargs['action'] = action kwargs['format'] = 'json' if 'formatversion' not in kwargs: kwargs['formatversion'] = 2 if self.maxlag is not None and 'maxlag' not in kwargs: kwargs['maxlag'] = self.maxlag # Estimate the size of the utf-8 encoded URL, and auto-switch to POST if too big data_size = sum( len(str(k).encode('utf-8')) + len(str(v).encode('utf-8')) + 2 for k, v in kwargs.items()) if data_size > self.auto_post_min_size: method = 'POST' if method == 'POST': request_kw['data'] = kwargs else: request_kw['params'] = kwargs return method, request_kw def login(self, user, password, on_demand=False): """ :param str user: user login name :param str password: user password :param bool on_demand: postpone login until an actual API request is made """ self.tokens = {} if on_demand: self._loginOnDemand = (user, password) return res = self('login', lgname=user, lgpassword=password, lgtoken=self.token('login'))['login'] if res['result'] != 'Success': raise ApiError('Login failed', res) self._loginOnDemand = False self.logged_in = True def is_bot(self) -> bool: """ Checks if the current user account has the "bot" user right. """ if self._is_bot is None: res = self('query', meta='userinfo', uiprop='rights') self._is_bot = 'bot' in res.query.userinfo.rights return self._is_bot def query(self, **kwargs): """ Call Query API with given parameters, and yield all results returned by the server, properly handling result continuation. """ return self.iterate('query', **kwargs) def iterate(self, action, **kwargs): """ Call any "continuation" style MW API with given parameters, such as the 'query' API. Yields all results returned by the server, properly handling result continuation. Use generator.send({...}) to dynamically adjust next request's parameters with the new parameters. :param str action: MW API action, e.g. 'query' :param kwargs: any API parameters :return: yields each response from the server """ if 'rawcontinue' in kwargs: raise ValueError("rawcontinue is not supported with query() function, " "use object's __call__()") if 'formatversion' in kwargs: raise ValueError("version is not supported with query() function, " "use object's __call__()") if 'continue' not in kwargs: kwargs['continue'] = '' req = kwargs req['formatversion'] = 2 while True: result = self(action, **req) if action in result: adjustments = yield result[action] else: adjustments = None if 'continue' not in result: break # re-send all continue values in the next call req = kwargs.copy() req.update(result['continue']) if adjustments: req.update(adjustments) def query_pages(self, **kwargs): """ Query the server and yield all page objects one by one. This method makes sure that results received in multiple responses are correctly merged together. If any of the pages change during iteration, ApiPagesModifiedError(list) will be thrown after all other pages have been processed and yielded. """ # A dict with incomplete page objects incomplete = {} # A set of page ids that we will ignore because # they have been modified during iteration modified = set() missing = set() for result in self.query(**kwargs): if 'pages' not in result: raise ApiError('Missing pages element in query result', result) new_incomplete = {} for page in result['pages']: if 'missing' in page: if page['title'] not in missing: yield page missing.add(page['title']) continue page_id = page['pageid'] if page_id in modified: continue if page_id in incomplete: p = incomplete[page_id] del incomplete[page_id] if 'lastrevid' in page and p['lastrevid'] != page['lastrevid']: # someone else modified this page, # it must be requested separately in a new query modified.add(page_id) continue # Merge additional page data into the same dict self._merge_page(p, page) else: p = page new_incomplete[page_id] = p # Yield all pages that have not been mentioned in the last response for page_id, page in incomplete.items(): yield page incomplete = new_incomplete # Iteration is done, all incomplete are thus complete for page_id, page in incomplete.items(): yield page if modified: # some pages have been modified between api calls, notify caller raise ApiPagesModifiedError(list(modified)) def _merge_page(self, a, b): """ Recursively merge two page objects """ for k in b: val = b[k] if k in a: if isinstance(val, dict): self._merge_page(a[k], val) elif isinstance(val, list): a[k] = a[k] + val else: a[k] = val else: a[k] = val def token(self, token_type='csrf'): """ Get an api token. :param str token_type: :return: str """ if token_type not in self.tokens: res = self.query(meta='tokens', type=token_type, NO_LOGIN=token_type == 'login') self.tokens[token_type] = next(res)['tokens'][token_type + 'token'] return self.tokens[token_type] def request(self, method, force_ssl=False, headers=None, **request_kw): """Make a low level request to the server""" url = self.url if force_ssl: parts = list(urlparse.urlparse(url)) parts[0] = 'https' url = urlparse.urlunparse(parts) if headers: h = self.headers.copy() h.update(headers) headers = h else: headers = self.headers r = self.session.request(method, url, headers=headers, **request_kw) if not r.ok: raise ApiError('Call failed', r) if self.logger.isEnabledFor(logging.DEBUG): message = f"Request: {r.request.url}\nResponse: {len(r.content):,} bytes" self.logger.debug(message, dict( code='server-response', url=r.request.url, headers=headers, )) return r def parse_json(self, value): """ Utility function to convert server reply into a JSON object. By default, JSON objects support direct property access (JavaScript style) """ if isinstance(value, str): # noinspection PyTypeChecker return json.loads(value, object_hook=self.json_object_hook) elif hasattr(value.__class__, 'json'): return value.json(object_hook=self.json_object_hook) else: # Our servers still have requests 0.8.2 ... :( # noinspection PyTypeChecker return json.loads(value.content, object_hook=self.json_object_hook) def __str__(self): res = self.url if self.logged_in: res += ' (logged in)' return res
class Client: DEFAULT_PROTOCOL = 'pickle' def __init__(self, url, version=None, protocol=DEFAULT_PROTOCOL, path=None, request='', timeout=None, dnscache=None, headers=None, auth=None, stream=False, log=DUMMY_LOG): headers = headers or {} self._url = url self._version = version self._protocol = protocol # FIXME: check validity self._path = path or [] self._request = request self._timeout = timeout self._dnscache = dnscache self._headers = CaseInsensitiveDict(headers) self._auth = auth self._stream = stream self._log = log if not self._dnscache: self._dnscache = DNSCache() def _get_state(self): return dict(url=self._url, version=self._version, protocol=self._protocol, path=self._path, request=self._request, timeout=self._timeout, dnscache=self._dnscache, headers=self._headers, auth=self._auth, stream=self._stream,log=self._log) def _copy(self, **kwargs): _kwargs = self._get_state() _kwargs.update(kwargs) return Client(**_kwargs) def _prepare_request(self, post_body, get_params=None): headers = self._headers.copy() if self._request: for hk, hv in self._request.headers.items(): if not hk.lower().startswith('x-kwikapi-'): continue headers[hk] = hv headers[REQUEST_ID_HEADER] = self._request.id headers[PROTOCOL_HEADER] = self._protocol upath = [self._version] + self._path upath = '/'.join(x for x in upath if x) url = urljoin(self._url, upath) if get_params: url = '{}?{}'.format(url, urlencode(get_params)) url = self._dnscache.map_url(url) if self._auth: self._auth.sign(url, headers, post_body) return url, post_body, headers def _make_request(self, url, post_body, headers): req = urllib.request.Request(url, data=post_body, headers=headers) res = urllib.request.urlopen(req) if self._stream: proto = PROTOCOLS[self._protocol] res = proto.deserialize_stream(res) res = Client._extract_stream_response(res) else: res = self._deserialize_response(res.read(), self._protocol) return res @staticmethod def _deserialize_response(data, protocol): proto = PROTOCOLS[protocol] r = proto.deserialize(data) return Client._extract_response(r) @staticmethod def _extract_response(r): success = r['success'] if not success: raise Exception(r['message']) # FIXME: raise proper exc else: r = r['result'] return r @staticmethod def _extract_stream_response(res): for r in res: yield Client._extract_response(r) @staticmethod def _serialize_params(params, protocol): proto = PROTOCOLS[protocol] data = proto.serialize(params) return data def __call__(self, *args, **kwargs): assert(not args) # FIXME: raise appropriate exception if self._path: # FIXME: support streaming in both directions _kwargs = get_loggable_params(kwargs or {}) self._log.debug('kwikapi.client.__call__', path=self._path, kwargs=_kwargs, url=self._url, version=self._version, protocol=self._protocol) post_body = self._serialize_params(kwargs, self._protocol) url, post_body, headers = self._prepare_request(post_body) res = self._make_request(url, post_body, headers) return res else: return self._copy(**kwargs) def __getattr__(self, attr): return self._copy(path=self._path + [attr])
class Site(object): """ Public properties (member variables at the moment): * url: Full url to site's api.php * session: current request.session object * log: an object that will be used for logging. ConsoleLog is created by default """ def __init__(self, url, headers=None, session=None, log=None): self._loginOnDemand = False self.session = session if session else requests.session() self.log = log if log else ConsoleLog() self.url = url self.tokens = {} self.noSSL = False # For non-ssl sites, it might be needed to avoid HTTPS try: script = os.path.abspath(sys.modules['__main__'].__file__) except (KeyError, AttributeError): script = sys.executable path, f = os.path.split(script) self.headers = CaseInsensitiveDict({u'User-Agent': u'%s-%s BareboneMWReq/0.1' % (os.path.basename(path), f)}) if headers: self.headers.update(headers) def __call__(self, action, **kwargs): """ Make an API call with any arguments provided as named values: data = site('query', meta='siteinfo') By default uses GET request to the default URL set in the Site constructor. In case of an error, ApiError exception will be raised Any warnings will be logged via the logging interface :param action could also be Several special "magic" parameters could be used to customize api call. Special parameters must be all CAPS to avoid collisions with the server API: :param POST: Use POST method when calling server API. Value is ignored. :param HTTPS: Force https (ssl) protocol for this request. Value is ignored. :param EXTRAS: Any extra parameters as passed to requests' session.request(). Value is a dict() """ # Magic CAPS parameters method = 'POST' if 'POST' in kwargs or action in ['login', 'edit'] else 'GET' forceSSL = not self.noSSL and (action == 'login' or 'SSL' in kwargs or 'HTTPS' in kwargs) request_kw = dict() if 'EXTRAS' not in kwargs else kwargs['EXTRAS'] # Clean up magic CAPS params as they shouldn't be passed to the server for k in ['POST', 'SSL', 'HTTPS', 'EXTRAS']: if k in kwargs: del kwargs[k] for k, val in kwargs.items(): # Only support the well known types. # Everything else should be client's responsibility if isinstance(val, list) or isinstance(val, tuple): kwargs[k] = '|'.join(val) # Make server call kwargs['action'] = action kwargs['format'] = 'json' if method == 'POST': request_kw['data'] = kwargs else: request_kw['params'] = kwargs if self._loginOnDemand and action != 'login': self.login(self._loginOnDemand[0], self._loginOnDemand[1]) data = parseJson(self.request(method, forceSSL=forceSSL, **request_kw)) # Handle success and failure if 'error' in data: raise ApiError('Server API Error', data['error']) if 'warnings' in data: self.log(2, data['warnings']) return data def login(self, user, password, onDemand=False): """ :param user: :param password: :param onDemand: if True, will postpone login until an actual API request is made :return: """ self.tokens = {} if onDemand: self._loginOnDemand = (user, password) return res = self('login', lgname=user, lgpassword=password)['login'] if res['result'] == 'NeedToken': res = self('login', lgname=user, lgpassword=password, lgtoken=res['token'])['login'] if res['result'] != 'Success': raise ApiError('Login failed', res) self._loginOnDemand = False def query(self, **kwargs): """ Call Query API with given parameters, and yield all results returned by the server, properly handling result continuation. """ if 'rawcontinue' in kwargs: raise ValueError("rawcontinue is not supported with query() function, use object's __call__()") if 'continue' not in kwargs: kwargs['continue'] = '' req = kwargs while True: result = self('query', **req) if 'query' in result: yield result['query'] if 'continue' not in result: break # re-send all continue values in the next call req = kwargs.copy() req.update(result['continue']) def queryPages(self, **kwargs): """ Query the server and return all page objects individually. """ incomplete = {} changed = set() for result in self.query(**kwargs): if 'pages' not in result: raise ApiError('Missing pages element in query result', result) finished = incomplete.copy() for pageId, page in result['pages'].items(): if pageId in changed: continue if pageId in incomplete: del finished[pageId] # If server returned it => not finished p = incomplete[pageId] if 'lastrevid' in page and p['lastrevid'] != page['lastrevid']: # someone else modified this page, it must be requested anew separately changed.add(pageId) del incomplete[pageId] continue self._mergePage(p, page) else: p = page incomplete[pageId] = p for pageId, page in finished.items(): if pageId not in changed: yield page for pageId, page in incomplete.items(): yield page if changed: # some pages have been changed between api calls, notify caller raise ApiPagesModifiedError(list(changed)) def _mergePage(self, a, b): """ Recursively merge two page objects """ for k in b: val = b[k] if k in a: if isinstance(val, dict): self._mergePage(a[k], val) elif isinstance(val, list): a[k] = a[k] + val else: a[k] = val else: a[k] = val def token(self, tokenType='csrf'): if tokenType not in self.tokens: self.tokens[tokenType] = next(self.query(meta='tokens', type=tokenType))['tokens'][tokenType + 'token'] return self.tokens[tokenType] def request(self, method, forceSSL=False, headers=None, **request_kw): """Make a low level request to the server""" url = self.url if forceSSL: parts = list(urlparse.urlparse(url)) parts[0] = 'https' url = urlparse.urlunparse(parts) if headers: h = self.headers.copy() h.update(headers) headers = h else: headers = self.headers r = self.session.request(method, url, headers=headers, **request_kw) if not r.ok: raise ApiError('Call failed', r) if self.log.isEnabled(5): dbg = [r.request.url, headers] self.log(5, dbg) return r
class BasePixivAPI(object): client_id = "MOBrBDS8blbauoSck0ZfDbtuzpyT" client_secret = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj" hash_secret = "28c1fdd170a5204386cb1313c7077b34f83e4aaf4aa829ce78c231e05b0bae2c" def __init__(self, **requests_kwargs: Any) -> None: """initialize requests kwargs if need be""" self.user_id: Union[int, str] = 0 self.access_token: Optional[str] = None self.refresh_token: Optional[str] = None self.hosts = "https://app-api.pixiv.net" # self.requests = requests.Session() self.requests = cloudscraper.create_scraper() # fix due to #140 self.additional_headers = CaseInsensitiveDict( requests_kwargs.pop("headers", {}) ) # type: CaseInsensitiveDict[Any] self.requests_kwargs = requests_kwargs def set_additional_headers(self, headers: ParamDict) -> None: """manually specify additional headers. will overwrite API default headers in case of collision""" self.additional_headers = CaseInsensitiveDict(headers) # 设置HTTP的Accept-Language (用于获取tags的对应语言translated_name) # language: en-us, zh-cn, ... def set_accept_language(self, language: str) -> None: """set header Accept-Language for all requests (useful for get tags.translated_name)""" self.additional_headers["Accept-Language"] = language @classmethod def parse_json(cls, json_str: str) -> ParsedJson: """parse str into JsonDict""" return json.loads(json_str, object_hook=JsonDict) def require_auth(self) -> None: if self.access_token is None: raise PixivError( "Authentication required! Call login() or set_auth() first!" ) def requests_call( self, method, url, headers=None, params=None, data=None, stream=False, ): # type: (str, str, Union[ParamDict, CaseInsensitiveDict[Any]], ParamDict, ParamDict, bool) -> Response """requests http/https call for Pixiv API""" merged_headers = self.additional_headers.copy() if headers: # Use the headers in the parameter to override the # additional_headers setting. merged_headers.update(headers) try: if method == "GET": return self.requests.get( url, params=params, headers=merged_headers, stream=stream, **self.requests_kwargs ) elif method == "POST": return self.requests.post( url, params=params, data=data, headers=merged_headers, stream=stream, **self.requests_kwargs ) elif method == "DELETE": return self.requests.delete( url, params=params, data=data, headers=merged_headers, stream=stream, **self.requests_kwargs ) else: raise PixivError("Unknown method: %s" % method) except Exception as e: raise PixivError("requests %s %s error: %s" % (method, url, e)) def set_auth(self, access_token: str, refresh_token: Optional[str] = None) -> None: self.access_token = access_token self.refresh_token = refresh_token def login(self, username: str, password: str) -> Any: return self.auth(username=username, password=password) def set_client(self, client_id: str, client_secret: str) -> None: self.client_id = client_id self.client_secret = client_secret def auth( self, username: Optional[str] = None, password: Optional[str] = None, refresh_token: Optional[str] = None, headers: ParamDict = None, ) -> ParsedJson: """Login with password, or use the refresh_token to acquire a new bearer token""" local_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00") headers_ = CaseInsensitiveDict(headers or {}) headers_["x-client-time"] = local_time headers_["x-client-hash"] = hashlib.md5( (local_time + self.hash_secret).encode("utf-8") ).hexdigest() # Allow mock UA due to #171: https://github.com/upbit/pixivpy/issues/171 if "user-agent" not in headers_: headers_["app-os"] = "ios" headers_["app-os-version"] = "14.6" headers_["user-agent"] = "PixivIOSApp/7.13.3 (iOS 14.6; iPhone13,2)" # noinspection PyUnresolvedReferences if not hasattr(self, "hosts") or self.hosts == "https://app-api.pixiv.net": auth_hosts = "https://oauth.secure.pixiv.net" else: # noinspection PyUnresolvedReferences auth_hosts = self.hosts # BAPI解析成IP的场景 headers_["host"] = "oauth.secure.pixiv.net" url = "%s/auth/token" % auth_hosts data = { "get_secure_url": 1, "client_id": self.client_id, "client_secret": self.client_secret, } if username and password: data["grant_type"] = "password" data["username"] = username data["password"] = password elif refresh_token or self.refresh_token: data["grant_type"] = "refresh_token" data["refresh_token"] = refresh_token or self.refresh_token else: raise PixivError("[ERROR] auth() but no password or refresh_token is set.") r = self.requests_call("POST", url, headers=headers, data=data) if r.status_code not in {200, 301, 302}: if data["grant_type"] == "password": raise PixivError( "[ERROR] auth() failed! check username and password.\nHTTP %s: %s" % (r.status_code, r.text), header=r.headers, body=r.text, ) else: raise PixivError( "[ERROR] auth() failed! check refresh_token.\nHTTP %s: %s" % (r.status_code, r.text), header=r.headers, body=r.text, ) token = None try: # get access_token token = self.parse_json(r.text) self.user_id = token.response.user.id self.access_token = token.response.access_token self.refresh_token = token.response.refresh_token except json.JSONDecodeError: raise PixivError( "Get access_token error! Response: %s" % token, header=r.headers, body=r.text, ) # return auth/token response return token def download( self, url: str, prefix: str = "", path: str = os.path.curdir, name: Optional[str] = None, replace: bool = False, fname: Optional[Union[str, IO[bytes]]] = None, referer: str = "https://app-api.pixiv.net/", ) -> bool: """Download image to file (use 6.0 app-api)""" if hasattr(fname, "write"): # A file-like object has been provided. file = fname else: # Determine file path by parameters. name = prefix + str(name or fname or os.path.basename(url)) file = os.path.join(path, name) if os.path.exists(file) and not replace: return False with self.requests_call( "GET", url, headers={"Referer": referer}, stream=True ) as response: if isinstance(file, str): with open(file, "wb") as out_file: shutil.copyfileobj(response.raw, out_file) else: shutil.copyfileobj(response.raw, file) # type: ignore[arg-type] return True