def init(self): """Initialize connection""" logger.debug(u"Initializing %s" % self.__class__.__name__) if sys.platform != "win32" else logger.debug( "Log line suppressed on windows" ) self.session = requests.session() self.session.headers.update({"User-Agent": self.user_agent})
def __addImg(self,data): api = conf.read_config(conf.path, 'API', 'addImgApi') url = base + api s = requests.session() result = s.post(url,data) s.keep_alive = False return result.text
def init(self): super(Itasa, self).init() login_pattern = '<input type="hidden" name="return" value="([^\n\r\t ]+?)" /><input type="hidden" name="([^\n\r\t ]+?)" value="([^\n\r\t ]+?)" />' response = requests.get(self.server_url + 'index.php') if response.status_code != 200: raise ServiceError('Initiate failed') match = re.search(login_pattern, response.content, re.IGNORECASE | re.DOTALL) if not match: raise ServiceError('Can not find unique id parameter on page') login_parameter = {'username': '******', 'passwd': 'subliminal', 'remember': 'yes', 'Submit': 'Login', 'remember': 'yes', 'option': 'com_user', 'task': 'login', 'silent': 'true', 'return': match.group(1), match.group(2): match.group(3) } self.session = requests.session() r = self.session.post(self.server_url + 'index.php', data=login_parameter) if not re.search('logouticon.png', r.content, re.IGNORECASE | re.DOTALL): raise ServiceError('Itasa Login Failed')
def __getAssetByType(self,pid): api = conf.read_config(conf.path, 'API', 'getAssetByTypeApi') url = base + api + '?project_id=' + pid s = requests.session() result = s.post(url) s.keep_alive = False return result.json()
def __getLastVersion(self,entity_id,entity_type): api = conf.read_config(conf.path, 'API', 'getLastVersionApi') url = base + api + '?entity_id=' + entity_id + '&entity_type=' + entity_type s = requests.session() result = s.post(url) s.keep_alive = False return result.content
def __init__(self, name): # these need to be set in the subclass self.providerType = None self.name = name self.proxy = ProviderProxy() self.proxyGlypeProxySSLwarning = None self.urls = {} self.url = '' self.show = None self.supportsBacklog = False self.supportsAbsoluteNumbering = False self.anime_only = False self.search_mode = None self.search_fallback = False self.enable_daily = False self.enable_backlog = False self.cache = tvcache.TVCache(self) self.session = requests.session() self.headers = {'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': USER_AGENT}
def __upLoadFile(self,filePath,directory): api = conf.read_config(conf.path, 'API', 'publishFileApi') url = base + api + '?f=' + directory files = {'file': open(filePath, 'rb')} s = requests.session() s.post(url, files=files) s.keep_alive = False
def __getTaskStep(self,uid,pid,tableName): api = conf.read_config(conf.path, 'API', 'getTaskStepApi') url = base + api + '?user_id=' + uid + '&project_id='+ pid s = requests.session() result = s.post(url) s.keep_alive = False return result.json()[tableName]
def authenticate_with_server(): http_session = requests.session() http_session.get(LOGIN_PAGE, headers=HEADER) data = {"pseudo": USERNAME, "passe": PASSWORD, "souvenir": 1} http_session.post(POST_LOGIN_PAGE, data, headers=HEADER) logger.warning('LOGIN') save_session_in_cache(http_session) save_session_to_db() return http_session
def __getSingleAsset(self,pid,entityId,entityType): api = conf.read_config(conf.path, 'API', 'getSingleAssetApi') url = base + api + '?project_id=' + pid + '&entity_id=' + entityId + '&entity_type=' + entityType s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['SA'] else: return ""
def __getReferenceVersion(self,version_id): api = conf.read_config(conf.path, 'API', 'getReferenceVersionApi') url = base + api + '?version_id=' + version_id s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['ATTACHMENTID'] else: return ""
def __getVersion(self,vname,pid): api = conf.read_config(conf.path, 'API', 'getVersionByNameApi') url = base + api + '?versionName=' + vname + '&project_id=' + pid s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['VERSION'] else: return ""
def __getImg(self,imgId): api = conf.read_config(conf.path, 'API', 'getImgApi') url = base + api + '?image_id=' + imgId s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['THUMBNAIL'] else: return ""
def __getProject(self,uid): api = conf.read_config(conf.path, 'API', 'getProjectApi') url = base + api + '?user_id=' + uid s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['PROJECT'] else: return ""
def __getSingleTask(self,uid,taskId,stepId): api = conf.read_config(conf.path, 'API', 'getSingleTaskApi') url = (base + api + '?uid=' + uid + '&task_id=' + taskId + '&step_id=' + stepId) s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['TASK'] else: return ""
def __getAssetType(self): api = conf.read_config(conf.path, 'API', 'getAssetTypeAPi') url = base + api s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['ASSETTYPE'] else: return ""
def __getAsset(self,uid,project_id,start,length): api = conf.read_config(conf.path, 'API', 'getAssetApi') url = base + api + '?uid=' + uid + '&project_id=' + project_id + '&start=' + start + '&length=' + length s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['ASSET'] else: return ""
def __getSequences(self,pid): api = conf.read_config(conf.path, 'API', 'sequencesApi') url = base + api + pid s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['SEQUENCENAME'] else: return ""
def __lastVersion(self,vid): api = conf.read_config(conf.path, 'API', 'checkVersionApi2') url = base + api s = requests.session() data = {'id':vid} result = s.post(url,data = data) s.keep_alive = False if result.text != u"null": return result.json()['LASTVERSION'] else: return ""
def __init__(self, name, host=None, username=None, password=None): self.name = name self.username = sickbeard.TORRENT_USERNAME if username is None else username self.password = sickbeard.TORRENT_PASSWORD if password is None else password self.host = sickbeard.TORRENT_HOST if host is None else host self.url = None self.response = None self.auth = None self.last_time = time.time() self.session = requests.session(auth=(self.username, self.password),timeout=60)
def __getFile(self,entity_id,entity_type,project_id): api = conf.read_config(conf.path, 'API', 'workfileApi') url = (base + api + '?entity_id=' + entity_id + '&entity_type=' + entity_type + '&project_id=' + project_id) s = requests.session() result = s.post(url) s.keep_alive = False if result.text != u"null": return result.json()['VERSION'] else: return ""
def __getUserInfo(self,userName,password): base = conf.read_config(conf.path, 'API', 'baseUrl') api = conf.read_config(conf.path, 'API', 'loginApi') url = base + api data = {'name': userName,'password':password} s = requests.session() result = s.post(url,data = data) s.keep_alive = False if result.json() != "error": return result.json()['Table'] else: return "error"
def __getCompetence(self,uid): base = conf.read_config(conf.path, 'API', 'baseUrl') api = conf.read_config(conf.path, 'API', 'getUserRule') url = base + api s = requests.session() idData = { 'ID': uid, 'VALUE': '5' }; result = s.post(url,data = idData) s.keep_alive = False if result.text != u"null": return result.json()['AUTH'] else: return ""
def __init__(self, name, host=None, username=None, password=None): self.name = name self.username = sickbeard.TORRENT_USERNAME if username is None else username self.password = sickbeard.TORRENT_PASSWORD if password is None else password self.host = sickbeard.TORRENT_HOST if host is None else host self.url = None self.response = None self.auth = None self.last_time = time.time() self.session = requests.session(auth=(self.username, self.password), timeout=60)
def __init__(self, name, host=None, username=None, password=None): self.name = name self.username = sickbeard.TORRENT_USERNAME if username is None else username self.password = sickbeard.TORRENT_PASSWORD if password is None else password self.host = sickbeard.TORRENT_HOST if host is None else host.rstrip( '/') + '/' self.url = None self.auth = None self.last_time = time.time() self.session = requests.session() self.session.auth = (self.username, self.password) self.created_id = None
def __init__(self): generic.TorrentProvider.__init__(self, "Rarbg") self.enabled = False self.supportsBacklog = True self.ratio = None self.cache = RarbgCache(self) self.urls = { 'url': 'https://rarbg.com', 'base_url': 'https://rarbg.com/torrents.php', 'search': 'https://rarbg.com/torrents.php?search=%s&category=%s&page=%s', 'download': 'https://rarbg.com/download.php?id=%s&f=%s', } self.url = self.urls['base_url'] self.subcategories = [18, 41] self.pages = [1, 2, 3, 4, 5] self.cookie = { "version": 0, "name": '7fAY799j', "value": 'VtdTzG69', "port": None, # "port_specified": False, "domain": 'rarbg.com', # "domain_specified": False, # "domain_initial_dot": False, "path": '/', # "path_specified": True, "secure": False, "expires": None, "discard": True, "comment": None, "comment_url": None, "rest": {}, "rfc2109": False } self.session = requests.session() self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36' }) self.session.cookies.set(**self.cookie)
def __init__(self, name): # these need to be set in the subclass self.providerType = None self.name = name self.url = '' self.show = None self.supportsBacklog = False self.cache = tvcache.TVCache(self) self.session = requests.session() self.session.verify = False self.session.headers.update({'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'})
def _doLogin(self): login_params = {'username': self.username, 'password': self.password, 'ssl': 'yes' } if not self.session: self.session = requests.session() try: response = self.session.post(self.urls['login'], data=login_params, timeout=30, verify=False) except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError), e: logger.log(u'Unable to connect to ' + self.name + ' provider: ' + ex(e), logger.ERROR) return False
def __init__(self, name): # these need to be set in the subclass self.providerType = None self.name = name self.url = '' self.show = None self.supportsBacklog = False self.cache = tvcache.TVCache(self) self.session = requests.session() self.session.verify = False self.session.headers.update({ 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'})
def __init__(self): generic.TorrentProvider.__init__(self, "Rarbg") self.enabled = False self.supportsBacklog = True self.supportsFrench = False self.ratio = None self.cache = RarbgCache(self) self.urls = {'url': 'https://rarbg.com', 'base_url': 'https://rarbg.com/torrents.php', 'search': 'https://rarbg.com/torrents.php?search=%s&category=%s&page=%s', 'download': 'https://rarbg.com/download.php?id=%s&f=%s', } self.url = self.urls['base_url'] self.subcategories = [18,41] self.pages = [1,2,3,4,5] self.cookie = { "version": 0, "name": '7fAY799j', "value": 'VtdTzG69', "port": None, # "port_specified": False, "domain": 'rarbg.com', # "domain_specified": False, # "domain_initial_dot": False, "path": '/', # "path_specified": True, "secure": False, "expires": None, "discard": True, "comment": None, "comment_url": None, "rest": {}, "rfc2109": False } self.session = requests.session() self.session.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36'}) self.session.cookies.set(**self.cookie)
def __init__(self, username=None, password=None): self.session = requests.session() self.session.headers = self.default_headers self.username = username self.password = password self.authkey = None self.passkey = None self.userid = None self.logged_in_user = None self.cached_users = {} self.cached_artists = {} self.cached_tags = {} self.cached_torrent_groups = {} self.cached_torrents = {} self.cached_requests = {} self.cached_categories = {} self.site = "https://what.cd/" self.past_request_timestamps = []
def __init__(self, username=None, password=None): self.session = requests.session(headers=self.default_headers) self.username = username self.password = password self.authkey = None self.passkey = None self.userid = None self.logged_in_user = None self.cached_users = {} self.cached_artists = {} self.cached_tags = {} self.cached_torrent_groups = {} self.cached_torrents = {} self.cached_requests = {} self.cached_categories = {} self.site = "https://what.cd/" self.rate_limit = 2.0 # seconds between requests self._login()
def _doLogin(self): login_params = { 'username': self.username, 'password': self.password, 'ssl': 'yes' } if not self.session: self.session = requests.session() try: response = self.session.post(self.urls['login'], data=login_params, timeout=30, verify=False) except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError), e: logger.log( u'Unable to connect to ' + self.name + ' provider: ' + ex(e), logger.ERROR) return False
def downloadpage(url, **opt): # logger.info() """ Open a url and return the data obtained @param url: url to open. @type url: str @param post: If it contains any value, it is sent by POST. @type post: str @param headers: Headers for the request, if it contains nothing the default headers will be used. @type headers: dict, list @param timeout: Timeout for the request. @type timeout: int @param follow_redirects: Indicates if redirects are to be followed. @type follow_redirects: bool @param cookies: Indicates whether cookies are to be used. @type cookies: bool @param replace_headers: If True, headers passed by the "headers" parameter will completely replace the default headers. If False, the headers passed by the "headers" parameter will modify the headers by default. @type replace_headers: bool @param add_referer: Indicates whether to add the "Referer" header using the domain of the url as a value. @type add_referer: bool @param only_headers: If True, only headers will be downloaded, omitting the content of the url. @type only_headers: bool @param random_headers: If True, use the method of selecting random headers. @type random_headers: bool @param ignore_response_code: If True, ignore the method for WebErrorException for error like 404 error in veseriesonline, but it is a functional data @type ignore_response_code: bool @return: Result of the petition @rtype: HTTPResponse @param use_requests: Use requests.session() @type: bool Parameter Type Description -------------------------------------------------- -------------------------------------------------- ------------ HTTPResponse.sucess: bool True: Request successful | False: Error when making the request HTTPResponse.code: int Server response code or error code if an error occurs HTTPResponse.error: str Description of the error in case of an error HTTPResponse.headers: dict Dictionary with server response headers HTTPResponse.data: str Response obtained from server HTTPResponse.json: dict Response obtained from the server in json format HTTPResponse.time: float Time taken to make the request """ url = scrapertools.unescape(url) domain = urlparse.urlparse(url).netloc global CF_LIST CF = False if domain in FORCE_CLOUDSCRAPER_LIST: from lib import cloudscraper session = cloudscraper.create_scraper() CF = True else: from lib import requests session = requests.session() if domain in CF_LIST or opt.get('CF', False): url = 'https://web.archive.org/save/' + url CF = True if config.get_setting('resolver_dns') and not opt.get( 'use_requests', False): from specials import resolverdns session.mount('https://', resolverdns.CipherSuiteAdapter(domain, CF)) req_headers = default_headers.copy() # Headers passed as parameters if opt.get('headers', None) is not None: if not opt.get('replace_headers', False): req_headers.update(dict(opt['headers'])) else: req_headers = dict(opt['headers']) if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS: req_headers['User-Agent'] = random_useragent() url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") opt['url_save'] = url opt['post_save'] = opt.get('post', None) response = {} info_dict = [] payload = dict() files = {} file_name = '' session.verify = opt.get('verify', True) if opt.get('cookies', True): session.cookies = cj session.headers.update(req_headers) proxy_data = {'dict': {}} inicio = time.time() if opt.get( 'timeout', None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None: opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT if opt['timeout'] == 0: opt['timeout'] = None if len(url) > 0: try: if opt.get('post', None) is not None or opt.get('file', None) is not None: if opt.get('post', None) is not None: # Convert string post in dict try: json.loads(opt['post']) payload = opt['post'] except: if not isinstance(opt['post'], dict): post = urlparse.parse_qs(opt['post'], keep_blank_values=1) payload = dict() for key, value in post.items(): try: payload[key] = value[0] except: payload[key] = '' else: payload = opt['post'] # Verify 'file' and 'file_name' options to upload a buffer or file if opt.get('file', None) is not None: if os.path.isfile(opt['file']): if opt.get('file_name', None) is None: path_file, opt['file_name'] = os.path.split( opt['file']) files = { 'file': (opt['file_name'], open(opt['file'], 'rb')) } file_name = opt['file'] else: files = { 'file': (opt.get('file_name', 'Default'), opt['file']) } file_name = opt.get('file_name', 'Default') + ', Buffer de memoria' info_dict = fill_fields_pre(url, opt, proxy_data, file_name) if opt.get('only_headers', False): # Makes the request with HEAD method req = session.head(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) else: # Makes the request with POST method req = session.post(url, data=payload, allow_redirects=opt.get( 'follow_redirects', True), files=files, timeout=opt['timeout']) elif opt.get('only_headers', False): info_dict = fill_fields_pre(url, opt, proxy_data, file_name) # Makes the request with HEAD method req = session.head(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) else: info_dict = fill_fields_pre(url, opt, proxy_data, file_name) # Makes the request with GET method req = session.get(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) except Exception as e: from lib import requests req = requests.Response() if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''): response['data'] = '' response['sucess'] = False info_dict.append(('Success', 'False')) response['code'] = str(e) info_dict.append(('Response code', str(e))) info_dict.append(('Finalizado en', time.time() - inicio)) if not opt.get('alfa_s', False): show_infobox(info_dict) return type('HTTPResponse', (), response) else: req.status_code = str(e) else: response['data'] = '' response['sucess'] = False response['code'] = '' return type('HTTPResponse', (), response) response_code = req.status_code if req.headers.get('Server', '').startswith('cloudflare') and response_code in [ 429, 503, 403 ] and not opt.get('CF', False): if domain not in CF_LIST: opt["CF"] = True with open(CF_LIST_PATH, "a") as CF_File: CF_File.write("%s\n" % domain) logger.debug("CF retry... for domain: %s" % domain) return downloadpage(url, **opt) response['data'] = req.content if req.content else '' if CF: import re response['data'] = re.sub('["|\']/save/[^"]*(https?://[^"]+)', '"\\1', response['data']) response['url'] = req.url if type(response['data']) != str: response['data'] = response['data'].decode('UTF-8') if not response['data']: response['data'] = '' try: response['json'] = to_utf8(req.json()) except: response['json'] = dict() response['code'] = response_code response['headers'] = req.headers response['cookies'] = req.cookies info_dict, response = fill_fields_post(info_dict, req, response, req_headers, inicio) if opt.get('cookies', True): save_cookies(alfa_s=opt.get('alfa_s', False)) # is_channel = inspect.getmodule(inspect.currentframe().f_back) # is_channel = scrapertools.find_single_match(str(is_channel), "<module '(channels).*?'") # if is_channel and isinstance(response_code, int): # if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''): # if response_code > 399: # show_infobox(info_dict) # raise WebErrorException(urlparse.urlparse(url)[1]) if not 'api.themoviedb' in url and not opt.get('alfa_s', False): show_infobox(info_dict) return type('HTTPResponse', (), response)
def authenticate(self): feedinfo = [] try: with requests.session() as s: if mylar.VERIFY_32P == 1 or mylar.VERIFY_32P == True: verify = True else: verify = False logger.fdebug('[32P] Verify SSL set to : ' + str(verify)) if not verify: #32P throws back an insecure warning because it can't validate against the CA. The below suppresses the message just for 32P instead of being displa$ from lib.requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) # fetch the login page s.headers = self.headers try: s.get(self.url, verify=verify, timeout=30) except (requests.exceptions.SSLError, requests.exceptions.Timeout) as e: logger.error(self.module + ' Unable to establish connection to 32P: ' + str(e)) return # post to the login form r = s.post(self.url, data=self.payload, verify=verify) #need a way to find response code (200=OK), but returns 200 for everything even failed signons (returns a blank page) #logger.info('[32P] response: ' + str(r.content)) if self.searchterm: logger.info('[32P] Successfully authenticated. Initiating search for : ' + self.searchterm) return self.search32p(s) soup = BeautifulSoup(r.content) all_script = soup.find_all("script", {"src": False}) all_script2 = soup.find_all("link", {"rel": "alternate"}) for ind_s in all_script: all_value = str(ind_s) all_items = all_value.split() auth_found = False user_found = False for al in all_items: if al == 'authkey': auth_found = True elif auth_found == True and al != '=': authkey = re.sub('["/;]', '', al).strip() auth_found = False logger.fdebug(self.module + ' Authkey found: ' + str(authkey)) if al == 'userid': user_found = True elif user_found == True and al != '=': userid = re.sub('["/;]', '', al).strip() user_found = False logger.fdebug(self.module + ' Userid found: ' + str(userid)) authfound = False logger.info(self.module + ' Atttempting to integrate with all of your 32P Notification feeds.') for al in all_script2: alurl = al['href'] if 'auth=' in alurl and 'torrents_notify' in alurl and not authfound: f1 = alurl.find('auth=') f2 = alurl.find('&', f1 + 1) auth = alurl[f1 +5:f2] logger.fdebug(self.module + ' Auth:' + str(auth)) authfound = True p1 = alurl.find('passkey=') p2 = alurl.find('&', p1 + 1) passkey = alurl[p1 +8:p2] logger.fdebug(self.module + ' Passkey:' + str(passkey)) if self.reauthenticate: break if 'torrents_notify' in alurl and ('torrents_notify_' + str(passkey)) not in alurl: notifyname_st = alurl.find('name=') notifyname_en = alurl.find('&', notifyname_st +1) if notifyname_en == -1: notifyname_en = len(alurl) notifyname = alurl[notifyname_st +5:notifyname_en] notifynumber_st = alurl.find('torrents_notify_') notifynumber_en = alurl.find('_', notifynumber_st +17) notifynumber = alurl[notifynumber_st:notifynumber_en] logger.fdebug(self.module + ' [NOTIFICATION: ' + str(notifyname) + '] Notification ID: ' + str(notifynumber)) #generate the rss-url here feedinfo.append({'feed': notifynumber + '_' + str(passkey), 'feedname': notifyname, 'user': userid, 'auth': auth, 'passkey': passkey, 'authkey': authkey}) except (requests.exceptions.Timeout, EnvironmentError): logger.warn('Unable to retrieve information from 32Pages - either it is not responding/is down or something else is happening that is stopping me.') return #set the keys here that will be used to download. try: mylar.PASSKEY_32P = passkey mylar.AUTHKEY_32P = authkey # probably not needed here. mylar.KEYS_32P = {} mylar.KEYS_32P = {"user": userid, "auth": auth, "passkey": passkey, "authkey": authkey} except NameError: logger.warn('Unable to retrieve information from 32Pages - either it is not responding/is down or something else is happening that is stopping me.') return if self.reauthenticate: return else: mylar.FEEDINFO_32P = feedinfo return feedinfo
def execute(args): pp = pprint.PrettyPrinter(indent=2) try: # verbose verbose = args['v'] # veryverbose veryverbose = args['vv'] if veryverbose: verbose = True # istSOS service service = args['s'] # const constraint role role = "urn:ogc:def:classifiers:x-istsos:1.0:qualityIndex:check:reasonable" # filename csvfile = args['f'] req = requests.session() # Open CSV file fo = open(csvfile, "rw+") #check file validity rlines = [ row.strip().split(",") for row in fo.readlines() if row.strip() is not ""] lines = [] for line in rlines: lines.append([c.strip() for c in line ]) # load sensor description res = req.get("%s/procedures/operations/getlist" % (service), prefetch=True, verify=False) jj = json.loads(res.content) if veryverbose: print "RETRIVING PRECEDURES..." pp.pprint(res.json) print "---------------------" elif verbose: if jj['success'] is False: pp.pprint(res.json) print "---------------------" procedures = dict( ( i["name"], [ j["name"] for j in i["observedproperties"] ] ) for i in jj["data"] ) for nr,line in enumerate(lines): line = [ l.strip() for l in line ] if len(line)==4: if not line[0] in procedures.keys(): raise Exception("[line %s]: procedure '%s' not observed by the istsos service!" %(nr,line[0]) ) if not "-".join(line[1].split(":")[-2:]) in procedures[line[0]]: raise Exception("[line %s]: procedure '%s' does not observe property '%s'!" %(nr,line[0],line[1]) ) if not (is_number(line[2]) or line[2] is ""): raise Exception("[line %s]: value '%s' at column 3 should represent min values if present, check it is a number!" %(nr,line[2]) ) if not (is_number(line[3]) or line[3] is ""): raise Exception("[line %s]: value '%s' at column 4 should represent min values if present, check it is a number!" %(nr,line[3]) ) else: raise Exception("[line %s]: %s input file must contain 4 row: station name, observed property URI, min, max" %(nr,line)) for nr,line in enumerate(lines): if line: # load sensor description res = req.get("%s/procedures/%s" % (service,line[0]), prefetch=True, verify=False) ds = json.loads(res.content) if veryverbose: print "RETRIVING PRECEDURES..." pp.pprint(res.json) print "---------------------" elif verbose: if ds['success'] is False: pp.pprint(res.json) print "---------------------" #update constraints in Json for opr in ds["data"]["outputs"]: if opr["definition"] == line[1]: opr["constraint"] = {} opr["constraint"]["role"]=role if line[2] and line[3]: opr["constraint"]["interval"]=[float(line[2]),float(line[3])] elif not line[2] and line[3]: opr["constraint"]["max"]=float(line[3]) elif line[2] and not line[3]: opr["constraint"]["min"]=float(line[2]) # send Json request to update constrain on service res = req.put("%s/procedures/%s" % (service,line[0]), prefetch=True, verify=False, data=json.dumps(ds["data"]) ) # read response jj = json.loads(res.content) if veryverbose: print "SAVING PRECEDURE %s..." % line[0] pp.pprint(json.dumps(ds["data"])) print "---------------------" print "---------------------" print " > Updated %s procedure success: %s" %(line[0],res.json['success']) if verbose: if jj['success'] is False: pp.pprint(res.json) print "---------------------" except Exception as e: print "ERROR: %s\n\n" % e traceback.print_exc()
def downloadpage(url, **opt): # logger.info() """ Open a url and return the data obtained @param url: url to open. @type url: str @param post: If it contains any value, it is sent by POST. @type post: str @param headers: Headers for the request, if it contains nothing the default headers will be used. @type headers: dict, list @param timeout: Timeout for the request. @type timeout: int @param follow_redirects: Indicates if redirects are to be followed. @type follow_redirects: bool @param cookies: Indicates whether cookies are to be used. @type cookies: bool @param replace_headers: If True, headers passed by the "headers" parameter will completely replace the default headers. If False, the headers passed by the "headers" parameter will modify the headers by default. @type replace_headers: bool @param add_referer: Indicates whether to add the "Referer" header using the domain of the url as a value. @type add_referer: bool @param only_headers: If True, only headers will be downloaded, omitting the content of the url. @type only_headers: bool @param random_headers: If True, use the method of selecting random headers. @type random_headers: bool @param ignore_response_code: If True, ignore the method for WebErrorException for error like 404 error in veseriesonline, but it is a functional data @type ignore_response_code: bool @return: Result of the petition @rtype: HTTPResponse @param use_requests: Use requests.session() @type: bool Parameter Type Description -------------------------------------------------- -------------------------------------------------- ------------ HTTPResponse.success: bool True: Request successful | False: Error when making the request HTTPResponse.code: int Server response code or error code if an error occurs HTTPResponse.error: str Description of the error in case of an error HTTPResponse.headers: dict Dictionary with server response headers HTTPResponse.data: str Response obtained from server HTTPResponse.json: dict Response obtained from the server in json format HTTPResponse.time: float Time taken to make the request """ url = scrapertools.unescape(url) parse = urlparse.urlparse(url) domain = parse.netloc from lib import requests session = requests.session() if config.get_setting('resolver_dns') and not opt.get( 'use_requests', False): from core import resolverdns session.mount('https://', resolverdns.CipherSuiteAdapter(domain)) req_headers = default_headers.copy() # Headers passed as parameters if opt.get('headers', None) is not None: if not opt.get('replace_headers', False): req_headers.update(dict(opt['headers'])) else: req_headers = dict(opt['headers']) if domain in directIP.keys() and not opt.get('disable_directIP', False): req_headers['Host'] = domain url = urlparse.urlunparse(parse._replace(netloc=directIP.get(domain))) if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS: req_headers['User-Agent'] = random_useragent() url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") opt['url_save'] = url opt['post_save'] = opt.get('post', None) response = {} info_dict = [] payload = dict() files = {} file_name = '' session.verify = opt.get('verify', True) if opt.get('cookies', True): session.cookies = cj session.headers.update(req_headers) proxy_data = {'dict': {}} inicio = time.time() if opt.get( 'timeout', None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None: opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT if opt['timeout'] == 0: opt['timeout'] = None if len(url) > 0: try: if opt.get('post', None) is not None or opt.get('file', None) is not None: if opt.get('post', None) is not None: # Convert string post in dict try: json.loads(opt['post']) payload = opt['post'] except: if not isinstance(opt['post'], dict): post = urlparse.parse_qs(opt['post'], keep_blank_values=1) payload = dict() for key, value in post.items(): try: payload[key] = value[0] except: payload[key] = '' else: payload = opt['post'] # Verify 'file' and 'file_name' options to upload a buffer or file if opt.get('file', None) is not None: if os.path.isfile(opt['file']): if opt.get('file_name', None) is None: path_file, opt['file_name'] = os.path.split( opt['file']) files = { 'file': (opt['file_name'], open(opt['file'], 'rb')) } file_name = opt['file'] else: files = { 'file': (opt.get('file_name', 'Default'), opt['file']) } file_name = opt.get('file_name', 'Default') + ', Buffer de memoria' info_dict = fill_fields_pre(url, opt, proxy_data, file_name) if opt.get('only_headers', False): # Makes the request with HEAD method req = session.head(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) else: # Makes the request with POST method req = session.post(url, data=payload, allow_redirects=opt.get( 'follow_redirects', True), files=files, timeout=opt['timeout']) elif opt.get('only_headers', False): info_dict = fill_fields_pre(url, opt, proxy_data, file_name) # Makes the request with HEAD method req = session.head(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) else: info_dict = fill_fields_pre(url, opt, proxy_data, file_name) # Makes the request with GET method req = session.get(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) except Exception as e: from lib import requests req = requests.Response() if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''): response['data'] = '' response['success'] = False info_dict.append(('Success', 'False')) import traceback response['code'] = traceback.format_exc() info_dict.append(('Response code', str(e))) info_dict.append(('Finished in', time.time() - inicio)) if not opt.get('alfa_s', False): show_infobox(info_dict) return type('HTTPResponse', (), response) else: req.status_code = str(e) else: response['data'] = '' response['success'] = False response['code'] = '' return type('HTTPResponse', (), response) response_code = req.status_code response['url'] = req.url response['data'] = req.content if req.content else '' if type(response['data']) != str: try: response['data'] = response['data'].decode('utf-8') except: response['data'] = response['data'].decode('ISO-8859-1') if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\ and not opt.get('CF', False) and 'Ray ID' in response['data'] and not opt.get('post', None): logger.debug("CF retry... for domain: %s" % domain) from lib import proxytranslate gResp = proxytranslate.process_request_proxy(url) if gResp: req = gResp['result'] response_code = req.status_code response['url'] = gResp['url'] response['data'] = gResp['data'] if not response['data']: response['data'] = '' try: response['json'] = to_utf8(req.json()) except: response['json'] = dict() response['code'] = response_code response['headers'] = req.headers response['cookies'] = req.cookies info_dict, response = fill_fields_post(info_dict, req, response, req_headers, inicio) if opt.get('cookies', True): save_cookies(alfa_s=opt.get('alfa_s', False)) if not 'api.themoviedb' in url and not opt.get('alfa_s', False): show_infobox(info_dict) if not config.get_setting("debug"): logger.info('Page URL:', url) return type('HTTPResponse', (), response)
def __init__(self, name, url, service, folderIn, pattern, folderOut=None, qualityIndex=False, exceptionBehaviour={}, user=None, password=None, debug=False, csvlength=5000, filenamecheck=None, archivefolder=None, extra={}): """ Info: name: procedure name url: url of the istSOS service service: service instance name folderIn: folder where raw data are stored (file:///home/pippo/rawdata) pattern: name of the raw data file (can contains wildcard *, eg: "T_TRE_*.dat") folderOut: folder where the formatted istSOS type files are created qualityIndex: force a quality index value for all the observed properties exceptionBehaviour: example { "RedundacyError": "overwrite", "StrictTimeSeriesError": "raise" } user and password: if user and password are required ... filenamecheck = { 'dateformat': '12_%Y-%m-%d_%H%M%S.dat', 'datetz': '+01:00', 'replace': ['_P','_M'], 'timedelta': timedelta(days=1) } """ # Can be used to speedup directory reading doinng it only once # > "folderIn" and "pattern" must be identical self.extra = extra self.fileArray = None self.name = name self.url = url self.service = service self.folderIn = folderIn self.pattern = pattern # Messages collected during processing self.messages = [] self.warnings = [] self.exceptions = [] self.debugfile = False self.debugConverter = False if debug == 'file': self.debug = False try: self.debugfile = open(os.path.join(self.folderOut, "log.txt"), "w") except Exception as e: self.log(str(e)) self.debug = True self.debugfile = False elif isinstance(debug, DebugConverter): self.debugConverter = debug self.debugConverter.addConverter(self) else: self.debug = debug self.addMessage("%s initialization" % name) self.req = requests.session() self.folderOut = folderOut if folderOut is not None else tempfile.mkdtemp( ) self.qualityIndex = qualityIndex self.user = user self.password = password self.auth = (self.user, self.password) if (self.user != None and self.password != None) else None self.archivefolder = archivefolder # Used inf the function "skipFile" self.fndtz = '+01:00' self.fntd = self.fnre = self.fndf = None if type(filenamecheck) == type({}): if 'dateformat' in filenamecheck: self.fndf = filenamecheck['dateformat'] if 'datetz' in filenamecheck: self.fndtz = filenamecheck['datetz'] if 'replace' in filenamecheck: self.fnre = filenamecheck['replace'] if 'timedelta' in filenamecheck: if not isinstance(filenamecheck['timedelta'], timedelta): raise InitializationError( "filenamecheck configuration contains a timedelta attribute, it shall be and instance of datetime.timedelta." ) self.fntd = filenamecheck['timedelta'] # >>> year = timedelta(days=365) # Array where Observation are stored during the parse operation self.observations = [] self.observationsCheck = {} self.describe = None self.endPosition = None self.log("%s initialized." % self.name) # Single loop execution information self.executing = {'file': None} # Load describeSensor from istSOS WALib (http://localhost/istsos/wa/istsos/services/demo/procedures/T_LUGANO) self.loadSensorMetadata()
def authenticate(self): feedinfo = [] try: with requests.session() as s: if mylar.VERIFY_32P == 1 or mylar.VERIFY_32P == True: verify = True else: verify = False logger.fdebug('[32P] Verify SSL set to : ' + str(verify)) if not verify: #32P throws back an insecure warning because it can't validate against the CA. The below suppresses the message just for 32P instead of being displa$ from lib.requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings( InsecureRequestWarning) # fetch the login page s.headers = self.headers try: t = s.get(self.url, verify=verify, timeout=30) except (requests.exceptions.SSLError, requests.exceptions.Timeout) as e: logger.error(self.module + ' Unable to establish connection to 32P: ' + str(e)) return chksoup = BeautifulSoup(t.content) chksoup.prettify() chk_login = chksoup.find_all("form", {"id": "loginform"}) if not chk_login: logger.warn( self.module + ' Something is wrong - either 32p is offline, or your account has been temporarily banned (possibly).' ) logger.warn( self.module + ' Disabling provider until this gets addressed by manual intervention.' ) return "disable" for ck in chk_login: #<div><div id='recaptchadiv'></div><input type='hidden' id='recaptchainp' value='' name='recaptchainp' /></div> captcha = ck.find("div", {"id": "recaptchadiv"}) capt_error = ck.find("span", { "class": "notice hidden", "id": "formnotice" }) error_msg = ck.find("span", {"id": "formerror"}) if error_msg: loginerror = " ".join(list(error_msg.stripped_strings)) logger.warn(self.module + ' Warning: ' + loginerror) if capt_error: aleft = ck.find("span", {"class": "info"}) attemptsleft = " ".join(list(aleft.stripped_strings)) if int(attemptsleft) < 6: logger.warn(self.module + ' ' + str(attemptsleft) + ' sign-on attempts left.') if captcha: logger.warn( self.module + ' Captcha detected. Temporariliy disabling 32p (to re-enable answer the captcha manually in a normal browswer or wait ~10 minutes...' ) return "disable" else: logger.fdebug( self.module + ' Captcha currently not present - continuing to signon...' ) if self.test: rtnmsg = '' if (not capt_error and not error_msg) or (capt_error and int(attemptsleft) == 6): rtnmsg += '[No Warnings/Errors]' else: if capt_error and int(attemptsleft) < 6: rtnmsg = '[' + str( attemptsleft) + ' sign-on attempts left]' if error_msg: rtnmsg += '[' + error_msg + ']' if not captcha: rtnmsg += '[No Captcha]' else: rtnmsg += '[Captcha Present!]' return rtnmsg # post to the login form r = s.post(self.url, data=self.payload, verify=verify) #need a way to find response code (200=OK), but returns 200 for everything even failed signons (returns a blank page) #logger.info('[32P] response: ' + str(r.content)) soup = BeautifulSoup(r.content) soup.prettify() #check for invalid username/password and if it's invalid - disable provider so we don't autoban (manual intervention is required after). chk_login = soup.find_all("form", {"id": "loginform"}) for ck in chk_login: captcha = ck.find("div", {"id": "recaptchadiv"}) errorlog = ck.find("span", {"id": "formerror"}) errornot = ck.find("span", { "class": "notice hidden", "id": "formnotice" }) loginerror = " ".join(list(errorlog.stripped_strings) ) #login_error.findNext(text=True) noticeerror = " ".join(list(errornot.stripped_strings) ) #notice_error.findNext(text=True) if captcha: logger.warn( self.module + ' Captcha detected. Temporariliy disabling 32p (to re-enable answer the captcha manually in a normal browswer or wait ~10 minutes' ) if errorlog: logger.error(self.module + ' Error: ' + loginerror) if errornot: aleft = ck.find("span", {"class": "info"}) attemptsleft = " ".join(list(aleft.stripped_strings)) if int(attemptsleft) < 6: logger.warn(self.module + ' ' + str(attemptsleft) + ' sign-on attempts left.') logger.error( self.module + ' Disabling 32P provider until errors can be fixed in order to avoid temporary bans.' ) return "disable" if not self.searchterm: logger.info( '[32P] Successfully authenticated. Verifying authentication & passkeys for usage.' ) else: logger.info( '[32P] Successfully authenticated. Initiating search for : ' + self.searchterm) return self.search32p(s) all_script = soup.find_all("script", {"src": False}) all_script2 = soup.find_all("link", {"rel": "alternate"}) for ind_s in all_script: all_value = str(ind_s) all_items = all_value.split() auth_found = False user_found = False for al in all_items: if al == 'authkey': auth_found = True elif auth_found == True and al != '=': authkey = re.sub('["/;]', '', al).strip() auth_found = False logger.fdebug(self.module + ' Authkey found: ' + str(authkey)) if al == 'userid': user_found = True elif user_found == True and al != '=': userid = re.sub('["/;]', '', al).strip() user_found = False logger.fdebug(self.module + ' Userid found: ' + str(userid)) authfound = False logger.info( self.module + ' Atttempting to integrate with all of your 32P Notification feeds.' ) for al in all_script2: alurl = al['href'] if 'auth=' in alurl and 'torrents_notify' in alurl and not authfound: f1 = alurl.find('auth=') f2 = alurl.find('&', f1 + 1) auth = alurl[f1 + 5:f2] logger.fdebug(self.module + ' Auth:' + str(auth)) authfound = True p1 = alurl.find('passkey=') p2 = alurl.find('&', p1 + 1) passkey = alurl[p1 + 8:p2] logger.fdebug(self.module + ' Passkey:' + str(passkey)) if self.reauthenticate: break if 'torrents_notify' in alurl and ( 'torrents_notify_' + str(passkey)) not in alurl: notifyname_st = alurl.find('name=') notifyname_en = alurl.find('&', notifyname_st + 1) if notifyname_en == -1: notifyname_en = len(alurl) notifyname = alurl[notifyname_st + 5:notifyname_en] notifynumber_st = alurl.find('torrents_notify_') notifynumber_en = alurl.find('_', notifynumber_st + 17) notifynumber = alurl[notifynumber_st:notifynumber_en] logger.fdebug(self.module + ' [NOTIFICATION: ' + str(notifyname) + '] Notification ID: ' + str(notifynumber)) #generate the rss-url here feedinfo.append({ 'feed': notifynumber + '_' + str(passkey), 'feedname': notifyname, 'user': userid, 'auth': auth, 'passkey': passkey, 'authkey': authkey }) except (requests.exceptions.Timeout, EnvironmentError): logger.warn( 'Unable to retrieve information from 32Pages - either it is not responding/is down or something else is happening that is stopping me.' ) return #set the keys here that will be used to download. try: mylar.PASSKEY_32P = passkey mylar.AUTHKEY_32P = authkey # probably not needed here. mylar.KEYS_32P = {} mylar.KEYS_32P = { "user": userid, "auth": auth, "passkey": passkey, "authkey": authkey } except NameError: logger.warn( 'Unable to retrieve information from 32Pages - either it is not responding/is down or something else is happening that is stopping me.' ) return if self.reauthenticate: return else: mylar.FEEDINFO_32P = feedinfo return feedinfo
def authenticate(self): feedinfo = [] try: with requests.session() as s: if mylar.VERIFY_32P == 1 or mylar.VERIFY_32P == True: verify = True else: verify = False logger.fdebug('[32P] Verify SSL set to : ' + str(verify)) if not verify: #32P throws back an insecure warning because it can't validate against the CA. The below suppresses the message just for 32P instead of being displa$ from lib.requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings( InsecureRequestWarning) # fetch the login page s.headers = self.headers try: s.get(self.url, verify=verify, timeout=30) except (requests.exceptions.SSLError, requests.exceptions.Timeout) as e: logger.error(self.module + ' Unable to establish connection to 32P: ' + str(e)) return # post to the login form r = s.post(self.url, data=self.payload, verify=verify) #need a way to find response code (200=OK), but returns 200 for everything even failed signons (returns a blank page) #logger.info('[32P] response: ' + str(r.content)) if self.searchterm: logger.info( '[32P] Successfully authenticated. Initiating search for : ' + self.searchterm) return self.search32p(s) soup = BeautifulSoup(r.content) all_script = soup.find_all("script", {"src": False}) all_script2 = soup.find_all("link", {"rel": "alternate"}) for ind_s in all_script: all_value = str(ind_s) all_items = all_value.split() auth_found = False user_found = False for al in all_items: if al == 'authkey': auth_found = True elif auth_found == True and al != '=': authkey = re.sub('["/;]', '', al).strip() auth_found = False logger.fdebug(self.module + ' Authkey found: ' + str(authkey)) if al == 'userid': user_found = True elif user_found == True and al != '=': userid = re.sub('["/;]', '', al).strip() user_found = False logger.fdebug(self.module + ' Userid found: ' + str(userid)) authfound = False logger.info( self.module + ' Atttempting to integrate with all of your 32P Notification feeds.' ) for al in all_script2: alurl = al['href'] if 'auth=' in alurl and 'torrents_notify' in alurl and not authfound: f1 = alurl.find('auth=') f2 = alurl.find('&', f1 + 1) auth = alurl[f1 + 5:f2] logger.fdebug(self.module + ' Auth:' + str(auth)) authfound = True p1 = alurl.find('passkey=') p2 = alurl.find('&', p1 + 1) passkey = alurl[p1 + 8:p2] logger.fdebug(self.module + ' Passkey:' + str(passkey)) if self.reauthenticate: break if 'torrents_notify' in alurl and ( 'torrents_notify_' + str(passkey)) not in alurl: notifyname_st = alurl.find('name=') notifyname_en = alurl.find('&', notifyname_st + 1) if notifyname_en == -1: notifyname_en = len(alurl) notifyname = alurl[notifyname_st + 5:notifyname_en] notifynumber_st = alurl.find('torrents_notify_') notifynumber_en = alurl.find('_', notifynumber_st + 17) notifynumber = alurl[notifynumber_st:notifynumber_en] logger.fdebug(self.module + ' [NOTIFICATION: ' + str(notifyname) + '] Notification ID: ' + str(notifynumber)) #generate the rss-url here feedinfo.append({ 'feed': notifynumber + '_' + str(passkey), 'feedname': notifyname, 'user': userid, 'auth': auth, 'passkey': passkey, 'authkey': authkey }) except (requests.exceptions.Timeout, EnvironmentError): logger.warn( 'Unable to retrieve information from 32Pages - either it is not responding/is down or something else is happening that is stopping me.' ) return #set the keys here that will be used to download. try: mylar.PASSKEY_32P = passkey mylar.AUTHKEY_32P = authkey # probably not needed here. mylar.KEYS_32P = {} mylar.KEYS_32P = { "user": userid, "auth": auth, "passkey": passkey, "authkey": authkey } except NameError: logger.warn( 'Unable to retrieve information from 32Pages - either it is not responding/is down or something else is happening that is stopping me.' ) return if self.reauthenticate: return else: mylar.FEEDINFO_32P = feedinfo return feedinfo