def analyze_url(uri: str) -> List[str]: """ Analyze given URI and get page information by using webXray. Arg: uri(str): Any URI that is not analyzed yet. Return: dict {"uri": list[str]}: (key: given uri, value: listed domain name of cookies) """ parser = ParseURI() pd = PhantomDriver("--ignore-ssl-errors=true --ssl-protocol=any", "wbxr_logger.js") output = pd.execute(uri, 25) if re.match("^FAIL.+", output): # Probably this isn't needed return [] else: try: parsed_data = json.loads(re.search("(\{.+\})", output).group(1)) except Exception as e: print(e) return [] orig_domain = parser.get_domain_pubsuffix_tld(uri)[0] cookie_domains = map(lambda x: x["domain"], parsed_data["cookies"]) tpcookie_domains = filter( lambda x: parser.get_domain_pubsuffix_tld(f"http://{x[1:]}")[0] != orig_domain, cookie_domains, ) tpcookie_domain_names = map(remove_dot, tpcookie_domains) return list(tpcookie_domain_names)
def __init__(self, dbname): self.uri_parser = ParseURI() self.sql_driver = MySQLDriver(dbname)
def __init__(self): self.uri_parser = ParseURI()