示例#1
0
文件: sublert.py 项目: mmg1/sublert
def posting_to_slack(result, dns_resolve, dns_output): #sending result to slack workplace
    global domain_to_monitor
    global new_subdomains
    if dns_resolve:
        dns_result = dns_output
        if dns_result:
            rev_url = []
            print(colored("\n[!] Exporting result to Slack. Please don't interrupt!", "red"))
            for url in dns_result:
                url = url.replace('*.', '')
                url = "https://" + url.replace('+ ', '')
                rev_url.append(get_fld(url))
            for subdomain in new_subdomains:
                subdomain = subdomain.replace('*.','')
                subdomain = subdomain.replace('+ ','')
                data = "<!channel> :new: {}".format(subdomain)
                slack(data)
                try:
                    if dns_result[subdomain]["A"]:
                        for i in dns_result[subdomain]["A"]:
                            data = "```A : {}```".format(i)
                            slack(data)
                except: pass
                try:
                    if dns_result[subdomain]['CNAME']:
                        for i in dns_result[subdomain]['CNAME']:
                            data = "```CNAME : {}```".format(i)
                            slack(data)
                except: pass
            print(colored("\n[!] Done. ", "green"))
            rev_url = list(set(rev_url))
            for url in rev_url:
                os.system("rm -f ./output/" + url.lower() + ".txt")
                os.system("mv -f ./output/" + url.lower() + "_tmp.txt " + "./output/" + url.lower() + ".txt") #save the temporary one
            os.system("rm -f ./output/*_tmp.txt") #remove the remaining tmp files

    elif result:
        rev_url = []
        print(colored("\n[!] Exporting the result to Slack. Please don't interrupt!", "red"))
        for url in result:
            url = "https://" + url.replace('+ ', '')
            rev_url.append(get_fld(url))
            data = "<!channel> :new: {}".format(url)
            slack(data)
        print(colored("\n[!] Done. ", "green"))
        rev_url = list(set(rev_url))

        for url in rev_url:
            os.system("rm -f ./output/" + url.lower() + ".txt")
            os.system("mv -f ./output/" + url.lower() + "_tmp.txt " + "./output/" + url.lower() + ".txt") #save the temporary one
        os.system("rm -f ./output/*_tmp.txt") #remove the remaining tmp files

    else:
        if not domain_to_monitor:
            data = "<!channel> :-1: We couldn't find any new subdomains."
            slack(data)
            print(colored("\n[!] Done. ", "green"))
            os.system("rm -f ./output/*_tmp.txt")
        else: pass
示例#2
0
def get_http_requests(conn, domain, id):
    query="select url from http_requests \
                where visit_id="+str(id)

    try:
        cur = conn.cursor()
        cur.execute(query)

        rows = cur.fetchall()
        
        # only those requests are first party whose get_fld() is same as get_fld() of original site analysed; rest are 3rd-party
        first_requests=[ele for ele in rows if domain==tld.get_fld(ele[0], fail_silently=True)]
        third_requests=[ele for ele in rows if domain!=tld.get_fld(ele[0], fail_silently=True)]
    except Error as e:
        return 0

    return [len(first_requests), len(third_requests)]
示例#3
0
 def get_website(self, soup):
     try:
         url = soup.find('span', attrs={'itemprop': 'url'}).get_text().strip()
         if not url.startswith('http'):
             url = 'http://' + url
         return get_fld(url)
     except Exception:
         return None
示例#4
0
 def get_website(self, soup):
     try:
         s = soup.find('a', class_='vendor-backlink').get('href')
         if not s.startswith('http'):
             s = 'http://' + s
         return get_fld(s)
     except Exception:
         return None
示例#5
0
文件: sublert.py 项目: mmg1/sublert
def domain_sanity_check(domain): #Verify the domain name sanity
    if domain:
        if ("http://" or "https://") not in domain:
            try:
                domain = get_fld("https://" + domain)
                return domain
            except:
                print(colored("[!] Incorrect domain format. Please follow this format: example.com, https://example.com, www.example.com", "red"))
                sys.exit(1)
        else:
            try:
                domain = get_fld(domain)
            except:
                print(colored("[!] Incorrect domain name. Please follow this format: example.com, https://example.com, www.example.com", "red"))
                sys.exit(1)
    else:
        pass
示例#6
0
 def get_website(self, soup):
     try:
         site = soup.find('a', {'itemprop': 'url'}).get('href')
         if not site.startswith('http'):
             site = 'http://' + site
         return get_fld(site)
     except Exception:
         return None
示例#7
0
 def get_website(self, soup):
     try:
         return get_fld(
             soup.find(
                 'a',
                 text=re.compile('^Visit.+Website$')).get('href').strip())
     except Exception:
         return None
示例#8
0
def get_website(site):
    '''
    extract domain from website
    '''

    temp_value = get_fld(site, fix_protocol=True)
    temp_value = temp_value.replace(".{username}", "").replace("{username}.", "")
    return temp_value
示例#9
0
def get_format_url(url, a_doc, host):
    a_href = a_doc.get('href')
    try:
        if a_href is not None and a_href.__len__() > 0:
            a_href = str(a_href).strip()
            a_href = a_href[:a_href.index('#')] if a_href.__contains__(
                '#') else a_href
            # a_href = a_href.encode('utf8')
            # a_href = urllib.quote(a_href,safe='.:/?&=')
            if a_href.startswith('//'):
                url = 'https:' + a_href if url.startswith(
                    'https:') else 'http:' + a_href
                url = mx.URL.URL(str(url))
                a_href = url.url
            elif a_href.startswith('/'):
                url = 'https://' + host + a_href if url.startswith(
                    'https:') else 'http://' + host + a_href
                url = mx.URL.URL(str(url))
                a_href = url.url
            elif a_href.startswith('./') or a_href.startswith('../'):
                url = mx.URL.URL(str(url) + '/' + a_href)
                a_href = url.url
            elif not a_href.startswith('javascript') and not a_href.startswith(
                    'mailto') and not a_href.startswith(
                        'http') and a_href != '':
                url = 'https://' + host + '/' + a_href if url.startswith(
                    'https:') else 'http://' + host + '/' + a_href
                url = mx.URL.URL(str(url))
                a_href = url.url
            a_href = a_href[:-1] if a_href.endswith('/') else a_href
            #a_href = a_href.lower()
        get_fld(a_href)
    except:
        return ''

    if not a_href.startswith('http'):
        return ''

    if a_href.__contains__('?'):
        a_params_str = a_href[a_href.index('?') + 1:]
        a_params = a_params_str.split('&')
        a_params.sort()
        a_params_str = '&'.join(a_params)
        a_href = a_href[:a_href.index('?') + 1] + a_params_str

    return a_href
示例#10
0
def wirte_sorl(head, body):
    with open("arrange.sorl", "w") as fp:
        for line in head:
            print(line, file=fp)
        url = (x.lstrip("*.") for x in body)
        fld = (get_fld(x, fix_protocol=True) for x in url)
        for line in sorted(set(fld)):
            print(f"*.{line}", file=fp)
示例#11
0
def use_cdn(cname):
    try:
        fld = get_fld(cname.rstrip('.'), fix_protocol=True)
        answers = dns.resolver.query(fld, 'NS')
    except:
        ### debug
        print('[DNS] {} can not find NS record'.format(fld))
    else:
        cdn_nss = json.load(open('cdn-ns.json'))
        for answer in answers:
            for cdn_vendor, cdn_ns_list in cdn_nss.items():
                if get_fld(answer.to_text().rstrip('.'),
                           fix_protocol=True) in cdn_ns_list:
                    ### debug
                    print('[CDN] Vendor: {}, NS: {}'.format(
                        cdn_vendor, answer.to_text()))
                    return True
    return False
示例#12
0
 def get_website(self, soup):
     try:
         s = soup.find('li', text=re.compile(r'www')).get_text().strip()
         if not s.startswith('http'):
             s = 'http://' + s
         return get_fld(s)
     except Exception:
         try:
             for h2 in soup.select('h2'):
                 txt = h2.get_text().strip()
                 if txt == 'Vendor Details':
                     s = h2.find_next('ul', \
                         class_='check-list').select('li')[1].get_text().strip()
                     if not s.startswith('http'):
                         s = 'http://' + s
                     return get_fld(s)
         except Exception:
             return None
示例#13
0
 def get_website(self, soup):
     try:
         s = soup.find('span', {'itemprop': 'author'}).find_next().find_next().get_text().strip()
         if not s.startswith('http'):
             s = 'http://' + s
         return get_fld(s)
     except Exception:
         try:
             s = MAIN_URL + soup.find('a', text=re.compile(r'Visit Website')).get('href')
             resp = request(s)
             external_url = re.findall(r'location\.replace.+?"(.+?)"', resp.text)[0]
             if 'external_click_ga' in external_url:
                 r = requests.head(external_url)
                 url = r.headers['Location']
                 return get_fld(url)
             return get_fld(external_url)
         except Exception:
             return None
示例#14
0
 def get(self, web):
     try:
         res = get_fld(web, fix_protocol=True)
         save = open('domain.txt', 'a')
         save.write('http://'+res+'\n')
         save.close()
         print(Fore.LIGHTBLUE_EX, '[+] http://{}'.format(res))
     except:
         pass
示例#15
0
 def get_website(self, soup):
     try:
         s = soup.find('span',
                       class_='website').find('a').get('href').strip()
         if not s.startswith('http'):
             s = 'http://' + s
         return get_fld(s)
     except Exception:
         return None
示例#16
0
    def get_root_domain(value, zone=None):
        """
        Get the root domain (FLD) for the provided value
        """
        res = get_fld(value, fix_protocol=True, fail_silently=True)
        if res is None:
            return zone

        return res
示例#17
0
def get_fld_from_value(value, zone):
    """
    Get the First Level Domain (FLD) for the provided value
    """
    res = get_fld(value, fix_protocol=True, fail_silently=True)
    if res is None:
        return zone

    return res
示例#18
0
def extract_domain(url):
    if type(url) == type(None):
        return None
    url = url if url.startswith("http") else "http://" + url
    try:
        #devuelve el top level domain dada una url
        return get_fld(url)
    except:
        return urlparse.urlparse(url).netloc
示例#19
0
    def get_data(self, url):
        resp = request(url)
        if resp:
            soup = BeautifulSoup(resp.text, 'lxml')

            name = soup.find('div', id='main').find('h1').find('span', class_='title').get_text().strip()
            website = None
            try:
                website = get_fld(soup.find('div', id='node-sidebar').find('div', \
                                class_='node-links').find('li', class_=re.compile('link-related-www')).find('a').get('href').strip())
            except Exception:
                pass
            desc = None
            try:
                desc = soup.find('strong', \
                            text=re.compile(name.strip(' County').upper())).previous('p').previous_element.get_text().replace(';', ',').strip()
            except Exception:
                pass
            addr = None
            try:
                addr = soup.find('div', class_='field-address').get_text().strip()
            except Exception:
                pass
            population = None
            try:
                population = soup.find('strong', text=re.compile(r'OPULATION')).next.next.strip()
            except Exception:
                pass
            inc_date = None
            try:
                inc_date = soup.find('strong', text=re.compile(r'INCORPORATION DATE')).next.next.strip()
            except Exception:
                pass
            boards = None
            try:
                boards = soup.find('strong', text=re.compile(r'BOARD')).find_parent().find_parent().find_next('ul').get_text().strip()
            except Exception:
                pass
            form_of_gov = None
            try:
                form_of_gov = soup.find('strong', text=re.compile(r'FORM OF GOVERNMENT')).next.next.strip()
            except Exception:
                pass

            data = [[
                name,
                website,
                desc,
                addr,
                population,
                inc_date,
                boards,
                form_of_gov,
                url
            ]]
            write_data(data, name)
示例#20
0
 def list_all_websites(self):
     '''
     list all the available websites' entries
     '''
     if len(self.websites_entries) > 0:
         for site in self.websites_entries:
             temp_value = get_fld(site["url"], fix_protocol=True)
             temp_value = temp_value.replace(".{username}", "").replace("{username}.", "")
             if not self.silent:
                 self.log.info(temp_value)
示例#21
0
async def blacklist_check(url: str,
                          transaction: Hub.current.scope.transaction):
    with transaction.start_child(op="task", description="Blacklist check"):
        blacklist = await open_blacklist()
        if not validate_ip(url):
            url = tld.get_fld(url, fix_protocol=True)
        if url in blacklist["blacklist"]:
            return blacklist["blacklist"][url]
        else:
            return False
示例#22
0
 def get_domain(url):
     domain = None
     try:
         domain = get_fld(url)
     except TldDomainNotFound:
         # Not yet known TLD or IP address or local hostname
         domain = urlparse(url).netloc
     except TldBadUrl:
         domain = None
     return domain
示例#23
0
 def __init__(self,domain,port,page,filename):
     intclass=GetAsset(domain,port,page,filename)
     if not intclass.JudgeIP(domain):
         domain="http://"+domain
         self.domain=get_fld(domain)
     else:
         self.domain=domain
     self.port=port
     self.page=page
     self.filename=filename
示例#24
0
 def get_url_domain(self, url):
     """
     获取url的domain
     """
     # 加锁
     self.lock.acquire()
     domain = get_fld(url)
     #释放锁
     self.lock.release()
     return domain
示例#25
0
 def get_website(self, soup):
     try:
         s = soup.find('li', attrs={
             'style': re.compile('link_grey.png')
         }).get_text().strip()
         if not s.startswith('http'):
             s = 'http://' + s
         return get_fld(s)
     except Exception:
         return None
示例#26
0
def list_all_websites():
    '''
    list all the available websites' entries
    '''

    if len(WEBSITES_ENTRIES) > 0:
        for site in WEBSITES_ENTRIES:
            temp_value = get_fld(site["url"], fix_protocol=True)
            temp_value = temp_value.replace(".{username}", "").replace("{username}.", "")
            LOG.info(temp_value)
示例#27
0
文件: html.py 项目: openzim/sotoki
 def rewrite_user_link(self, link):
     try:
         if self.conf.without_users_links and (
                 link["href"].startswith("mailto:")
                 or get_fld(link["href"]) in SOCIAL_DOMAINS):
             self.redact_link(link)
             return 1
     except Exception as exc:
         logger.warning(f"Failed to get fld for {link.get('href')}: {exc}")
         return 0
示例#28
0
def get_ps1_or_ipaddress(url):
    try:
        return get_fld(url, fail_silently=False)
    except Exception:
        hostname = urlparse(url).hostname
        try:
            ipaddress.ip_address(hostname)
            return hostname
        except Exception:
            return None
示例#29
0
def domain_sanity_check(domain): #Verify the domain name sanity
    if domain:
        try:
            domain = get_fld(domain, fix_protocol = True)
            return domain
        except:
            print(colored("[!] Incorrect domain format. Please follow this format: example.com, http(s)://example.com, www.example.com", "red"))
            sys.exit(1)
    else:
        pass
示例#30
0
def main(market_name,ts):
    ''' Main function where Bing Search is implemented and search term is defined. The results are returned and stored in new dataframe
    and the df is then exported to BING_SEARCH_NIELSEN collection.
    @:param ts: Timestamp
    @:type: int64'''
    connection = connect_mongo()
    marketmap = pd.DataFrame((connection[Settings.MARKET_MAP].find({"country":market_name.capitalize()},{"code":1,"country":1,"_id":0})))
    print(marketmap)
    reader = connection[Settings.CLEAN_NIELSEN].find({"UPPER_COUNTRY":market_name.upper(), "ts":ts},{"_id": 0})
    print(reader)
    for records in reader:
        try:
            count = connection[Settings.BRAND_SOURCE].count({"brand_low": str(records['UPPER_BRAND']).lower(), "country": str(records['UPPER_COUNTRY']).lower()})
            if count == 0:
                term = str(records['UPPER_BRAND']).lower()+" "+str(records['MANUFACTURER']).lower()
                print("fetching Records : ", term)
                brand=str(records['UPPER_BRAND']).lower()
                company=str(records['MANUFACTURER']).lower()
                market=str(records['UPPER_COUNTRY']).lower()
                if len(Settings.bing_subscription_key) == 32 and company != "PRIVATE LABEL":
                    print('Searching the Web for: ', term)
                    offset = 0
                    totalEstimatedMatches = 100
                    count = 0
                    while (offset < totalEstimatedMatches):
                        count = count + 1
                        headers, result = BingWebSearch(term, offset,marketmap, market)
                        data = json.dumps(json.loads(result), indent=4)
                        d_data = json.loads(data)
                        print("count : ", str(count))
                        print("offset : ", str(offset))
                        if 'webPages' in d_data:
                            totalEstimatedMatches = d_data['webPages']['totalEstimatedMatches']
                            news_dict = d_data['webPages']['value']
                            for news in news_dict:
                                news['hitCount'] = count
                                news['totalEstimateMatches'] = totalEstimatedMatches
                                news['fetchdate'] = datetime.datetime.utcnow()
                                news['offset'] = offset
                                news['query'] = term
                                news['count'] = 1
                                news['brand'] = brand
                                news['company'] = company
                                news['country'] = market
                                news['ts'] = ts
                                news['domain'] = get_fld(news['url'])
                                exists = connection[Settings.BING_SEARCH_NIELSEN].count({'brand': news['brand'], 'domain': news['domain'],'country':market_name})
                                if exists == 0:
                                    connection[Settings.BING_SEARCH_NIELSEN].insert(news)
                                else:
                                    connection[Settings.BING_SEARCH_NIELSEN].update_one({'brand': news['brand'], 'domain': news['domain'],'country':market_name})
                        offset = offset + len(news_dict)
                        totalEstimatedMatches = 0
        except Exception as e:
            print(str(e))
示例#31
0
def website_url_not_in_db(url):
    domain = db.session.query(DomainArchived).filter_by(
        name=get_fld(url)).first()
    if not domain:
        raise InvalidAPIRequest('主域名未收录,请先添加主域名')
    for model in [
            WebsiteArchived, WebsiteNews, WebsiteRecycler, WebsiteBanned,
            WebsiteDuplicated
    ]:
        if db.session.query(model).filter_by(url=url.strip('/')).first():
            raise RecordAlreadyExists('已有此网站')
def _get_zone_id(domain):
    tld = get_fld('http://' + domain)
    url = "https://api.cloudflare.com/client/v4/zones?name={0}".format(tld)
    for auth in CF_HEADERS:
        r = requests.get(url, headers=auth)
        r.raise_for_status()
        r = r.json().get('result',())
        if r:
            return auth, r[0]['id']
    logger.error(" + Domain {0} not found in any Cloudflare account".format(tld))
    sys.exit(1)
示例#33
0
def extract_tld_from_url(
        url: str) -> str:
    """
    Identify the top level domain of the url.
    
    Parameters
    ----------
    url: str.
        The URL of the taget website.

    Returns
    -------
    out: str.
        The TLD according to Mozilla's tables.
    """
    return get_fld(url)
示例#34
0
文件: sublert.py 项目: mmg1/sublert
    def lookup(self, domain, wildcard = True):
        base_url = "https://crt.sh/?q={}&output=json"
        if wildcard:
            domain = "%25.{}".format(domain)
            url = base_url.format(domain)
        subdomains = []
        user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0'

        try:
            req = requests.get(url, headers={'User-Agent': user_agent}, timeout=20, verify=False) #times out after 8 seconds waiting
            if req.status_code == 200:
                try:
                    content = req.content.decode('utf-8')
                    data = json.loads(content)
                    for subdomain in data:
                        subdomains.append(subdomain["name_value"])
                    return subdomains
                except:
                    error = "Error retrieving information for {}.".format(domain.replace('%25.', ''))
                    errorlog(error, enable_logging)
        except:
            try: #connecting to crt.sh postgres database to retrieve subdomains in case API fails
                unique_domains = []
                domain = domain.replace('%25.', '')
                conn = psycopg2.connect("dbname={0} user={1} host={2}".format(DB_NAME, DB_USER, DB_HOST))
                conn.autocommit = True
                cursor = conn.cursor()
                cursor.execute("SELECT ci.NAME_VALUE NAME_VALUE FROM certificate_identity ci WHERE ci.NAME_TYPE = 'dNSName' AND reverse(lower(ci.NAME_VALUE)) LIKE reverse(lower('%{}'));".format(domain))
                for result in cursor.fetchall():
                    matches = re.findall(r"\'(.+?)\'", str(result))
                    for subdomain in matches:
                        try:
                            if get_fld("https://" + subdomain) == domain:
                                unique_domains.append(subdomain)
                        except: pass
                return unique_domains
            except:
                print(colored("[!] Unable to connect to the database.".format(domain), "red"))
                error = "Unable to connect to the database."
                errorlog(error, enable_logging)