Exemplo n.º 1
0
 async def init_db(self):
     try:
         db = stash.StashManager()
         await db.do_init()
     except Exception as error:
         print(f'{error}')
Exemplo n.º 2
0
async def start():
    parser = argparse.ArgumentParser(
        description='theHarvester is used to gather open source intelligence (OSINT) on a\n'
                    'company or domain.')
    parser.add_argument('-d', '--domain', help='company name or domain to search', required=True)
    parser.add_argument('-l', '--limit', help='limit the number of search results, default=500', default=500, type=int)
    parser.add_argument('-S', '--start', help='start with result number X, default=0', default=0, type=int)
    parser.add_argument('-g', '--google-dork', help='use Google Dorks for Google search', default=False, action='store_true')
    parser.add_argument('-p', '--proxies', help='use proxies for requests, enter proxies in proxies.yaml', default=False, action='store_true')
    parser.add_argument('-s', '--shodan', help='use Shodan to query discovered hosts', default=False, action='store_true')
    parser.add_argument('-v', '--virtual-host', help='verify host name via DNS resolution and search for virtual hosts', action='store_const', const='basic', default=False)
    parser.add_argument('-e', '--dns-server', help='DNS server to use for lookup')
    parser.add_argument('-t', '--dns-tld', help='perform a DNS TLD expansion discovery, default False', default=False)
    parser.add_argument('-r', '--take-over', help='Check for takeovers', default=False, action='store_true')
    parser.add_argument('-n', '--dns-lookup', help='enable DNS server lookup, default False', default=False, action='store_true')
    parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
    parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
    parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, bufferoverun, certspotter, crtsh, dnsdumpster,
                        dogpile, duckduckgo, exalead, github-code, google,
                        hunter, intelx,
                        linkedin, linkedin_links, netcraft, otx, securityTrails, spyse, threatcrowd,
                        trello, twitter, vhost, virustotal, yahoo, all''')

    args = parser.parse_args()
    try:
        db = stash.StashManager()
        await db.do_init()
    except Exception:
        pass

    all_emails: list = []
    all_hosts: list = []
    all_ip: list = []
    dnsbrute = args.dns_brute
    dnslookup = args.dns_lookup
    dnsserver = args.dns_server
    dnstld = args.dns_tld
    engines = []
    filename: str = args.filename
    full: list = []
    ips: list = []
    google_dorking = args.google_dork
    host_ip: list = []
    limit: int = args.limit
    shodan = args.shodan
    start: int = args.start
    all_urls: list = []
    vhost: list = []
    virtual = args.virtual_host
    word: str = args.domain
    takeover_status = args.take_over
    use_proxy = args.proxies

    async def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False,
                    store_emails: bool = False, store_ip: bool = False, store_people: bool = False,
                    store_data: bool = False, store_links: bool = False, store_results: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.

        :param search_engine: search engine to fetch details from
        :param source: source against which the details (corresponding to the search engine) need to be persisted
        :param process_param: any parameters to be passed to the search engine
                              eg: Google needs google_dorking
        :param store_host: whether to store hosts
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
        :param store_data: whether to fetch host from method get_data() and persist
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        """
        await search_engine.process(use_proxy) if process_param is None else await \
            search_engine.process(process_param, use_proxy)
        db_stash = stash.StashManager()
        if source == 'suip':
            print(f'\033[94m[*] Searching {source[0].upper() + source[1:]} this module can take 10+ min but is worth '
                  f'it. \033[0m')
        else:
            print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m')
        if store_host:
            host_names = filter(await search_engine.get_hostnames())
            full_hosts_checker = hostchecker.Checker(host_names)
            temp_hosts, temp_ips = await full_hosts_checker.check()
            ips.extend(temp_ips)
            full.extend(temp_hosts)
            all_hosts.extend(host_names)
            await db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(await search_engine.get_emails())
            all_emails.extend(email_list)
            await db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = await search_engine.get_ips()
            all_ip.extend(ips_list)
            await db_stash.store_all(word, all_ip, 'ip', source)
        if store_data:
            data = filter(await search_engine.get_data())
            all_hosts.extend(data)
            await db.store_all(word, all_hosts, 'host', source)
        if store_results:
            email_list, host_names, urls = await search_engine.get_results()
            all_emails.extend(email_list)
            host_names = filter(host_names)
            all_urls.extend(filter(urls))
            all_hosts.extend(host_names)
            await db.store_all(word, all_hosts, 'host', source)
            await db.store_all(word, all_emails, 'email', source)
        if store_people:
            people_list = await search_engine.get_people()
            await db_stash.store_all(word, people_list, 'people', source)
            if len(people_list) == 0:
                print('\n[*] No users found.\n\n')
            else:
                print('\n[*] Users found: ' + str(len(people_list)))
                print('---------------------')
                for usr in sorted(list(set(people_list))):
                    print(usr)
        if store_links:
            links = await search_engine.get_links()
            await db.store_all(word, links, 'name', engineitem)
            if len(links) == 0:
                print('\n[*] No links found.\n\n')
            else:
                print(f'\n[*] Links found: {len(links)}')
                print('---------------------')
                for link in sorted(list(set(links))):
                    print(link)

    stor_lst = []
    if args.source is not None:
        if args.source.lower() != 'all':
            engines = sorted(set(map(str.strip, args.source.split(','))))
        else:
            engines = Core.get_supportedengines()
        # Iterate through search engines in order
        if set(engines).issubset(Core.get_supportedengines()):
            print(f'\033[94m[*] Target: {word} \n \033[0m')

            for engineitem in engines:
                if engineitem == 'baidu':
                    from theHarvester.discovery import baidusearch
                    try:
                        baidu_search = baidusearch.SearchBaidu(word, limit)
                        stor_lst.append(store(baidu_search, engineitem, store_host=True, store_emails=True))
                    except Exception:
                        pass

                elif engineitem == 'bing' or engineitem == 'bingapi':
                    from theHarvester.discovery import bingsearch
                    try:
                        bing_search = bingsearch.SearchBing(word, limit, start)
                        bingapi = ''
                        if engineitem == 'bingapi':
                            bingapi += 'yes'
                        else:
                            bingapi += 'no'
                        stor_lst.append(
                            store(bing_search, 'bing', process_param=bingapi, store_host=True, store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(e)

                elif engineitem == 'bufferoverun':
                    from theHarvester.discovery import bufferoverun
                    try:
                        bufferoverun_search = bufferoverun.SearchBufferover(word)
                        stor_lst.append(store(bufferoverun_search, engineitem, store_host=True, store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'certspotter':
                    from theHarvester.discovery import certspottersearch
                    try:
                        certspotter_search = certspottersearch.SearchCertspoter(word)
                        stor_lst.append(store(certspotter_search, engineitem, None, store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'crtsh':
                    try:
                        from theHarvester.discovery import crtsh
                        crtsh_search = crtsh.SearchCrtsh(word)
                        stor_lst.append(store(crtsh_search, 'CRTsh', store_data=True))
                    except Exception as e:
                        print(f'\033[93m[!] A timeout occurred with crtsh, cannot find {args.domain}\n {e}\033[0m')

                elif engineitem == 'dnsdumpster':
                    try:
                        from theHarvester.discovery import dnsdumpster
                        dns_dumpster_search = dnsdumpster.SearchDnsDumpster(word)
                        stor_lst.append(store(dns_dumpster_search, engineitem, store_host=True))
                    except Exception as e:
                        print(f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m')

                elif engineitem == 'dogpile':
                    try:
                        from theHarvester.discovery import dogpilesearch
                        dogpile_search = dogpilesearch.SearchDogpile(word, limit)
                        stor_lst.append(store(dogpile_search, engineitem, store_host=True, store_emails=True))
                    except Exception as e:
                        print(f'\033[93m[!] An error occurred with Dogpile: {e} \033[0m')

                elif engineitem == 'duckduckgo':
                    from theHarvester.discovery import duckduckgosearch
                    duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(word, limit)
                    stor_lst.append(store(duckduckgo_search, engineitem, store_host=True, store_emails=True))

                elif engineitem == 'github-code':
                    try:
                        from theHarvester.discovery import githubcode
                        github_search = githubcode.SearchGithubCode(word, limit)
                        stor_lst.append(store(github_search, engineitem, store_host=True, store_emails=True))
                    except MissingKey as ex:
                        print(ex)
                    else:
                        pass

                elif engineitem == 'exalead':
                    from theHarvester.discovery import exaleadsearch
                    exalead_search = exaleadsearch.SearchExalead(word, limit, start)
                    stor_lst.append(store(exalead_search, engineitem, store_host=True, store_emails=True))

                elif engineitem == 'google':
                    from theHarvester.discovery import googlesearch
                    google_search = googlesearch.SearchGoogle(word, limit, start)
                    stor_lst.append(store(google_search, engineitem, process_param=google_dorking, store_host=True,
                                          store_emails=True))

                elif engineitem == 'hunter':
                    from theHarvester.discovery import huntersearch
                    # Import locally or won't work.
                    try:
                        hunter_search = huntersearch.SearchHunter(word, limit, start)
                        stor_lst.append(store(hunter_search, engineitem, store_host=True, store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'intelx':
                    from theHarvester.discovery import intelxsearch
                    # Import locally or won't work.
                    try:
                        intelx_search = intelxsearch.SearchIntelx(word, limit)
                        stor_lst.append(store(intelx_search, engineitem, store_host=True, store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(f'An exception has occurred in Intelx search: {e}')

                elif engineitem == 'linkedin':
                    from theHarvester.discovery import linkedinsearch
                    linkedin_search = linkedinsearch.SearchLinkedin(word, limit)
                    stor_lst.append(store(linkedin_search, engineitem, store_people=True))

                elif engineitem == 'linkedin_links':
                    from theHarvester.discovery import linkedinsearch
                    linkedin_links_search = linkedinsearch.SearchLinkedin(word, limit)
                    stor_lst.append(store(linkedin_links_search, 'linkedin', store_links=True))

                elif engineitem == 'netcraft':
                    from theHarvester.discovery import netcraft
                    netcraft_search = netcraft.SearchNetcraft(word)
                    stor_lst.append(store(netcraft_search, engineitem, store_host=True))

                elif engineitem == 'otx':
                    from theHarvester.discovery import otxsearch
                    try:
                        otxsearch_search = otxsearch.SearchOtx(word)
                        stor_lst.append(store(otxsearch_search, engineitem, store_host=True, store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'securityTrails':
                    from theHarvester.discovery import securitytrailssearch
                    try:
                        securitytrails_search = securitytrailssearch.SearchSecuritytrail(word)
                        stor_lst.append(store(securitytrails_search, engineitem, store_host=True, store_ip=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'suip':
                    from theHarvester.discovery import suip
                    try:
                        suip_search = suip.SearchSuip(word)
                        stor_lst.append(store(suip_search, engineitem, store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'spyse':
                    from theHarvester.discovery import spyse
                    try:
                        spyse_search = spyse.SearchSpyse(word)
                        stor_lst.append(store(spyse_search, engineitem, store_host=True, store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'threatcrowd':
                    from theHarvester.discovery import threatcrowd
                    try:
                        threatcrowd_search = threatcrowd.SearchThreatcrowd(word)
                        stor_lst.append(store(threatcrowd_search, engineitem, store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'trello':
                    from theHarvester.discovery import trello
                    # Import locally or won't work.
                    trello_search = trello.SearchTrello(word)
                    stor_lst.append(store(trello_search, engineitem, store_results=True))

                elif engineitem == 'twitter':
                    from theHarvester.discovery import twittersearch
                    twitter_search = twittersearch.SearchTwitter(word, limit)
                    stor_lst.append(store(twitter_search, engineitem, store_people=True))

                elif engineitem == 'virustotal':
                    from theHarvester.discovery import virustotal
                    virustotal_search = virustotal.SearchVirustotal(word)
                    stor_lst.append(store(virustotal_search, engineitem, store_host=True))

                elif engineitem == 'yahoo':

                    from theHarvester.discovery import yahoosearch
                    yahoo_search = yahoosearch.SearchYahoo(word, limit)
                    stor_lst.append(store(yahoo_search, engineitem, store_host=True, store_emails=True))
        else:
            print('\033[93m[!] Invalid source.\n\n \033[0m')
            sys.exit(1)

    async def worker(queue):
        while True:
            # Get a "work item" out of the queue.
            stor = await queue.get()
            try:
                await stor
                queue.task_done()
                # Notify the queue that the "work item" has been processed.
            except Exception:
                queue.task_done()

    async def handler(lst):
        queue = asyncio.Queue()

        for stor_method in lst:
            # enqueue the coroutines
            queue.put_nowait(stor_method)
        # Create five worker tasks to process the queue concurrently.
        tasks = []
        for i in range(5):
            task = asyncio.create_task(worker(queue))
            tasks.append(task)

        # Wait until the queue is fully processed.
        await queue.join()

        # Cancel our worker tasks.
        for task in tasks:
            task.cancel()
        # Wait until all worker tasks are cancelled.
        await asyncio.gather(*tasks, return_exceptions=True)

    await handler(lst=stor_lst)

    # Sanity check to see if all_emails and all_hosts are defined.
    try:
        all_emails
    except NameError:
        print('\n\n\033[93m[!] No emails found because all_emails is not defined.\n\n \033[0m')
        sys.exit(1)
    try:
        all_hosts
    except NameError:
        print('\n\n\033[93m[!] No hosts found because all_hosts is not defined.\n\n \033[0m')
        sys.exit(1)

    # Results
    if len(all_ip) == 0:
        print('\n[*] No IPs found.')
    else:
        print('\n[*] IPs found: ' + str(len(all_ip)))
        print('-------------------')
        # use netaddr as the list may contain ipv4 and ipv6 addresses
        ip_list = [netaddr.IPAddress(ip.strip()) for ip in set(all_ip)]
        ip_list.sort()
        print('\n'.join(map(str, ip_list)))

    if len(all_emails) == 0:
        print('\n[*] No emails found.')
    else:
        print('\n[*] Emails found: ' + str(len(all_emails)))
        print('----------------------')
        print(('\n'.join(sorted(list(set(all_emails))))))

    if len(all_hosts) == 0:
        print('\n[*] No hosts found.\n\n')
    else:
        print('\n[*] Hosts found: ' + str(len(all_hosts)))
        print('---------------------')
        all_hosts = sorted(list(set(all_hosts)))
        """full_host = hostchecker.Checker(all_hosts)
        full, ips = await full_host.check()"""
        db = stash.StashManager()
        full.sort(key=lambda el: el.split(':')[0])
        for host in full:
            host = str(host)
            print(host)
        host_ip = [netaddr_ip.format() for netaddr_ip in sorted([netaddr.IPAddress(ip) for ip in ips])]
        await db.store_all(word, host_ip, 'ip', 'DNS-resolver')
    length_urls = len(all_urls)
    if length_urls == 0:
        if len(engines) >= 1 and 'trello' in engines:
            print('\n[*] No Trello URLs found.')
    else:
        total = length_urls
        print('\n[*] Trello URLs found: ' + str(total))
        print('--------------------')
        for url in sorted(all_urls):
            print(url)

    # DNS brute force
    # dnsres = []
    if dnsbrute is True:
        print('\n[*] Starting DNS brute force.')
        a = dnssearch.DnsForce(word, dnsserver, verbose=True)
        a.process()
        # print('\n[*] Hosts found after DNS brute force:')
        # for y in res:
        # print('-------------------------------------')
        #    print(y)
        #   dnsres.append(y.split(':')[0])
        #    if y not in full:
        #        full.append(y)
        # db = stash.stash_manager()
        # db.store_all(word, dnsres, 'host', 'dns_bruteforce')

    # TakeOver Checking

    if takeover_status:
        print('\n[*] Performing subdomain takeover check')
        print('\n[*] Subdomain Takeover checking IS ACTIVE RECON')
        search_take = takeover.TakeOver(all_hosts)
        await search_take.process(proxy=use_proxy)

    # DNS reverse lookup
    dnsrev = []
    if dnslookup is True:
        print('\n[*] Starting active queries.')
        # load the reverse dns tools
        from theHarvester.discovery.dnssearch import (
            generate_postprocessing_callback,
            reverse_all_ips_in_range,
            serialize_ip_range)

        # reverse each iprange in a separate task
        __reverse_dns_tasks = {}
        for entry in host_ip:
            __ip_range = serialize_ip_range(ip=entry, netmask='24')
            if __ip_range and __ip_range not in set(__reverse_dns_tasks.keys()):
                print('\n[*] Performing reverse lookup on ' + __ip_range)
                __reverse_dns_tasks[__ip_range] = asyncio.create_task(reverse_all_ips_in_range(
                    iprange=__ip_range,
                    callback=generate_postprocessing_callback(
                        target=word,
                        local_results=dnsrev,
                        overall_results=full),
                    nameservers=[dnsserver] if dnsserver else None))

        # run all the reversing tasks concurrently
        await asyncio.gather(*__reverse_dns_tasks.values())

        # Display the newly found hosts
        print('[*] Hosts found after reverse lookup (in target domain):')
        print('--------------------------------------------------------')
        for xh in dnsrev:
            print(xh)

    # DNS TLD expansion
    dnstldres = []
    if dnstld is True:
        print('[*] Starting DNS TLD expansion.')
        a = dnssearch.DnsTld(word, dnsserver, verbose=True)
        res = a.process()
        print('\n[*] Hosts found after DNS TLD expansion:')
        print('----------------------------------------')
        for y in res:
            print(y)
            dnstldres.append(y)
            if y not in full:
                full.append(y)

    # Virtual hosts search
    if virtual == 'basic':
        print('\n[*] Virtual hosts:')
        print('------------------')
        for l in host_ip:
            basic_search = bingsearch.SearchBing(l, limit, start)
            await basic_search.process_vhost()
            results = await basic_search.get_allhostnames()
            for result in results:
                result = re.sub(r'[[\<\/?]*[\w]*>]*', '', result)
                result = re.sub('<', '', result)
                result = re.sub('>', '', result)
                print((l + '\t' + result))
                vhost.append(l + ':' + result)
                full.append(l + ':' + result)
        vhost = sorted(set(vhost))
    else:
        pass

    # Shodan
    shodanres = []
    if shodan is True:
        import texttable
        tab = texttable.Texttable()
        header = ['IP address', 'Hostname', 'Org', 'Services:Ports', 'Technologies']
        tab.header(header)
        tab.set_cols_align(['c', 'c', 'c', 'c', 'c'])
        tab.set_cols_valign(['m', 'm', 'm', 'm', 'm'])
        tab.set_chars(['-', '|', '+', '#'])
        tab.set_cols_width([15, 20, 15, 15, 18])
        print('\033[94m[*] Searching Shodan. \033[0m')
        try:
            for ip in host_ip:
                print(('\tSearching for ' + ip))
                shodan = shodansearch.SearchShodan()
                rowdata = await shodan.search_ip(ip)
                await asyncio.sleep(2)
                tab.add_row(rowdata)
            printedtable = tab.draw()
            print(printedtable)
        except Exception as e:
            print(f'\033[93m[!] An error occurred with Shodan: {e} \033[0m')
    else:
        pass

    # Here we need to add explosion mode.
    # We have to take out the TLDs to do this.
    if args.dns_tld is not False:
        counter = 0
        for word in vhost:
            search = googlesearch.SearchGoogle(word, limit, counter)
            await search.process(google_dorking)
            emails = await search.get_emails()
            hosts = await search.get_hostnames()
            print(emails)
            print(hosts)
    else:
        pass

    # Reporting
    if filename != "":
        try:
            print('\n[*] Reporting started.')
            db = stash.StashManager()
            scanboarddata = await db.getscanboarddata()
            latestscanresults = await db.getlatestscanresults(word)
            previousscanresults = await db.getlatestscanresults(word, previousday=True)
            latestscanchartdata = await db.latestscanchartdata(word)
            scanhistorydomain = await db.getscanhistorydomain(word)
            pluginscanstatistics = await db.getpluginscanstatistics()
            generator = statichtmlgenerator.HtmlGenerator(word)
            HTMLcode = await generator.beginhtml()
            HTMLcode += await generator.generatedashboardcode(scanboarddata)
            HTMLcode += await generator.generatelatestscanresults(latestscanresults)
            graph = reportgraph.GraphGenerator(word)
            await graph.init_db()
            HTMLcode += await graph.drawlatestscangraph(word, latestscanchartdata)
            HTMLcode += await graph.drawscattergraphscanhistory(word, scanhistorydomain)
            HTMLcode += await generator.generatepluginscanstatistics(pluginscanstatistics)
            HTMLcode += '<p><span style="color: #000000;">Report generated on ' + str(
                datetime.datetime.now()) + '</span></p>'
            HTMLcode += '''
               </body>
               </html>
               '''
            Html_file = open(f'{filename}.html' if '.html' not in filename else filename, 'w')
            Html_file.write(HTMLcode)
            Html_file.close()
            print('[*] Reporting finished.')
            print('[*] Saving files.')
        except Exception as e:
            print(e)
            print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
            sys.exit(1)

        try:
            filename = filename.rsplit('.', 1)[0] + '.xml'
            file = open(filename, 'w')
            file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
            for x in all_emails:
                file.write('<email>' + x + '</email>')
            for x in full:
                host, ip = x.split(':') if ':' in x else (x, '')
                if ip and len(ip) > 3:
                    file.write(f'<host><ip>{ip}</ip><hostname>{host}</hostname></host>')
                else:
                    file.write(f'<host>{host}</host>')
            for x in vhost:
                host, ip = x.split(':') if ':' in x else (x, '')
                if ip and len(ip) > 3:
                    file.write(f'<vhost><ip>{ip} </ip><hostname>{host}</hostname></vhost>')
                else:
                    file.write(f'<vhost>{host}</vhost>')
            if shodanres != []:
                shodanalysis = []
                for x in shodanres:
                    res = x.split('SAPO')
                    file.write('<shodan>')
                    file.write('<host>' + res[0] + '</host>')
                    file.write('<port>' + res[2] + '</port>')
                    file.write('<banner><!--' + res[1] + '--></banner>')
                    reg_server = re.compile('Server:.*')
                    temp = reg_server.findall(res[1])
                    if temp:
                        shodanalysis.append(res[0] + ':' + temp[0])
                    file.write('</shodan>')
                if shodanalysis:
                    shodanalysis = sorted(set(shodanalysis))
                    file.write('<servers>')
                    for x in shodanalysis:
                        file.write('<server>' + x + '</server>')
                    file.write('</servers>')

            file.write('</theHarvester>')
            file.flush()
            file.close()
            print('[*] Files saved.')
        except Exception as er:
            print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
        print('\n\n')
        sys.exit(0)
Exemplo n.º 3
0
    async def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False,
                    store_emails: bool = False, store_ip: bool = False, store_people: bool = False,
                    store_data: bool = False, store_links: bool = False, store_results: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.

        :param search_engine: search engine to fetch details from
        :param source: source against which the details (corresponding to the search engine) need to be persisted
        :param process_param: any parameters to be passed to the search engine
                              eg: Google needs google_dorking
        :param store_host: whether to store hosts
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
        :param store_data: whether to fetch host from method get_data() and persist
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        """
        await search_engine.process(use_proxy) if process_param is None else await \
            search_engine.process(process_param, use_proxy)
        db_stash = stash.StashManager()
        if source == 'suip':
            print(f'\033[94m[*] Searching {source[0].upper() + source[1:]} this module can take 10+ min but is worth '
                  f'it. \033[0m')
        else:
            print(f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m')
        if store_host:
            host_names = filter(await search_engine.get_hostnames())
            full_hosts_checker = hostchecker.Checker(host_names)
            temp_hosts, temp_ips = await full_hosts_checker.check()
            ips.extend(temp_ips)
            full.extend(temp_hosts)
            all_hosts.extend(host_names)
            await db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(await search_engine.get_emails())
            all_emails.extend(email_list)
            await db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = await search_engine.get_ips()
            all_ip.extend(ips_list)
            await db_stash.store_all(word, all_ip, 'ip', source)
        if store_data:
            data = filter(await search_engine.get_data())
            all_hosts.extend(data)
            await db.store_all(word, all_hosts, 'host', source)
        if store_results:
            email_list, host_names, urls = await search_engine.get_results()
            all_emails.extend(email_list)
            host_names = filter(host_names)
            all_urls.extend(filter(urls))
            all_hosts.extend(host_names)
            await db.store_all(word, all_hosts, 'host', source)
            await db.store_all(word, all_emails, 'email', source)
        if store_people:
            people_list = await search_engine.get_people()
            await db_stash.store_all(word, people_list, 'people', source)
            if len(people_list) == 0:
                print('\n[*] No users found.\n\n')
            else:
                print('\n[*] Users found: ' + str(len(people_list)))
                print('---------------------')
                for usr in sorted(list(set(people_list))):
                    print(usr)
        if store_links:
            links = await search_engine.get_links()
            await db.store_all(word, links, 'name', engineitem)
            if len(links) == 0:
                print('\n[*] No links found.\n\n')
            else:
                print(f'\n[*] Links found: {len(links)}')
                print('---------------------')
                for link in sorted(list(set(links))):
                    print(link)
Exemplo n.º 4
0
from theHarvester.lib import stash
from datetime import datetime
import plotly
import plotly.graph_objs as go

try:
    db = stash.StashManager()
    db.do_init()
except Exception as error:
    print(f'{error}')

    class GraphGenerator:
        def __init__(self, domain):
            self.domain = domain
            self.bardata = []
            self.barcolumns = []
            self.scatterxdata = []
            self.scattercountemails = []
            self.scattercounthosts = []
            self.scattercountips = []
            self.scattercountshodans = []
            self.scattercountvhosts = []

        def drawlatestscangraph(self, domain, latestscandata):
            try:
                self.barcolumns = ['email', 'host', 'ip', 'shodan', 'vhost']
                self.bardata.append(latestscandata['email'])
                self.bardata.append(latestscandata['host'])
                self.bardata.append(latestscandata['ip'])
                self.bardata.append(latestscandata['shodan'])
                self.bardata.append(latestscandata['vhost'])
Exemplo n.º 5
0
async def start(rest_args=None):
    """Main program function"""
    parser = argparse.ArgumentParser(
        description=
        'theHarvester is used to gather open source intelligence (OSINT) on a company or domain.'
    )
    parser.add_argument('-d',
                        '--domain',
                        help='Company name or domain to search.',
                        required=True)
    parser.add_argument(
        '-l',
        '--limit',
        help='Limit the number of search results, default=500.',
        default=500,
        type=int)
    parser.add_argument('-S',
                        '--start',
                        help='Start with result number X, default=0.',
                        default=0,
                        type=int)
    parser.add_argument('-g',
                        '--google-dork',
                        help='Use Google Dorks for Google search.',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '-p',
        '--proxies',
        help='Use proxies for requests, enter proxies in proxies.yaml.',
        default=False,
        action='store_true')
    parser.add_argument('-s',
                        '--shodan',
                        help='Use Shodan to query discovered hosts.',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '--screenshot',
        help=
        'Take screenshots of resolved domains specify output directory: --screenshot output_directory',
        default="",
        type=str)
    parser.add_argument(
        '-v',
        '--virtual-host',
        help=
        'Verify host name via DNS resolution and search for virtual hosts.',
        action='store_const',
        const='basic',
        default=False)
    parser.add_argument('-e',
                        '--dns-server',
                        help='DNS server to use for lookup.')
    parser.add_argument(
        '-t',
        '--dns-tld',
        help='Perform a DNS TLD expansion discovery, default False.',
        default=False)
    parser.add_argument('-r',
                        '--take-over',
                        help='Check for takeovers.',
                        default=False,
                        action='store_true')
    parser.add_argument('-n',
                        '--dns-lookup',
                        help='Enable DNS server lookup, default False.',
                        default=False,
                        action='store_true')
    parser.add_argument('-c',
                        '--dns-brute',
                        help='Perform a DNS brute force on the domain.',
                        default=False,
                        action='store_true')
    parser.add_argument('-f',
                        '--filename',
                        help='Save the results to an XML and JSON file.',
                        default='',
                        type=str)
    parser.add_argument(
        '-b',
        '--source',
        help=
        '''anubis, baidu, bing, binaryedge, bingapi, bufferoverun, censys, certspotter, crtsh,
                            dnsdumpster, duckduckgo, fullhunt, github-code, google,
                            hackertarget, hunter, intelx, linkedin, linkedin_links,
                            omnisint, otx, pentesttools, projectdiscovery,
                            qwant, rapiddns, rocketreach, securityTrails, spyse, sublist3r, threatcrowd, threatminer,
                            trello, twitter, urlscan, virustotal, yahoo, zoomeye'''
    )

    # determines if filename is coming from rest api or user
    rest_filename = ''
    # indicates this from the rest API
    if rest_args:
        if rest_args.source and rest_args.source == "getsources":
            return list(sorted(Core.get_supportedengines()))
        elif rest_args.dns_brute:
            args = rest_args
            dnsbrute = (rest_args.dns_brute, True)
        else:
            args = rest_args
            # We need to make sure the filename is random as to not overwrite other files
            filename: str = args.filename
            alphabet = string.ascii_letters + string.digits
            rest_filename += f"{''.join(secrets.choice(alphabet) for _ in range(32))}_{filename}" \
                if len(filename) != 0 else ""
    else:
        args = parser.parse_args()
        filename: str = args.filename
        dnsbrute = (args.dns_brute, False)
    try:
        db = stash.StashManager()
        await db.do_init()
    except Exception:
        pass

    all_emails: List = []
    all_hosts: List = []
    all_ip: List = []
    dnslookup = args.dns_lookup
    dnsserver = args.dns_server
    dnstld = args.dns_tld
    engines: List = []
    # If the user specifies

    full: List = []
    ips: List = []
    google_dorking = args.google_dork
    host_ip: List = []
    limit: int = args.limit
    shodan = args.shodan
    start: int = args.start
    all_urls: list = []
    vhost: list = []
    virtual = args.virtual_host
    word: str = args.domain
    takeover_status = args.take_over
    use_proxy = args.proxies
    linkedin_people_list_tracker: List = []
    linkedin_links_tracker: List = []
    twitter_people_list_tracker: List = []
    interesting_urls: list = []
    total_asns: list = []

    linkedin_people_list_tracker: list = []
    linkedin_links_tracker: list = []
    twitter_people_list_tracker: list = []

    interesting_urls: list = []
    total_asns: list = []

    async def store(search_engine: Any,
                    source: str,
                    process_param: Any = None,
                    store_host: bool = False,
                    store_emails: bool = False,
                    store_ip: bool = False,
                    store_people: bool = False,
                    store_links: bool = False,
                    store_results: bool = False,
                    store_interestingurls: bool = False,
                    store_asns: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.

        :param search_engine: search engine to fetch details from
        :param source: source against which the details (corresponding to the search engine) need to be persisted
        :param process_param: any parameters to be passed to the search engine eg: Google needs google_dorking
        :param store_host: whether to store hosts
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        :param store_interestingurls: whether to store interesting urls
        :param store_asns: whether to store asns
        """
        await search_engine.process(use_proxy) if process_param is None else await \
            search_engine.process(process_param, use_proxy)
        db_stash = stash.StashManager()
        if source:
            print(
                f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m'
            )
        if store_host:
            host_names = [
                host for host in filter(await search_engine.get_hostnames())
                if f'.{word}' in host
            ]
            if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
                # If source is inside this conditional it means the hosts returned must be resolved to obtain ip
                full_hosts_checker = hostchecker.Checker(host_names)
                temp_hosts, temp_ips = await full_hosts_checker.check()
                ips.extend(temp_ips)
                full.extend(temp_hosts)
            else:
                full.extend(host_names)
            all_hosts.extend(host_names)
            await db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(await search_engine.get_emails())
            all_emails.extend(email_list)
            await db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = await search_engine.get_ips()
            all_ip.extend(ips_list)
            await db_stash.store_all(word, all_ip, 'ip', source)
        if store_results:
            email_list, host_names, urls = await search_engine.get_results()
            all_emails.extend(email_list)
            host_names = [
                host for host in filter(host_names) if f'.{word}' in host
            ]
            all_urls.extend(filter(urls))
            all_hosts.extend(host_names)
            await db.store_all(word, all_hosts, 'host', source)
            await db.store_all(word, all_emails, 'email', source)
        if store_people:
            people_list = await search_engine.get_people()
            if source == 'twitter':
                twitter_people_list_tracker.extend(people_list)
            if source == 'linkedin':
                linkedin_people_list_tracker.extend(people_list)
            await db_stash.store_all(word, people_list, 'people', source)

        if store_links:
            links = await search_engine.get_links()
            linkedin_links_tracker.extend(links)
            if len(links) > 0:
                await db.store_all(word, links, 'linkedinlinks', engineitem)

        if store_interestingurls:
            iurls = await search_engine.get_interestingurls()
            interesting_urls.extend(iurls)
            if len(iurls) > 0:
                await db.store_all(word, iurls, 'interestingurls', engineitem)
        if store_asns:
            fasns = await search_engine.get_asns()
            total_asns.extend(fasns)
            if len(fasns) > 0:
                await db.store_all(word, fasns, 'asns', engineitem)

    stor_lst = []
    if args.source is not None:
        if args.source.lower() != 'all':
            engines = sorted(set(map(str.strip, args.source.split(','))))
        else:
            engines = Core.get_supportedengines()
        # Iterate through search engines in order
        if set(engines).issubset(Core.get_supportedengines()):
            print(f'\033[94m[*] Target: {word} \n \033[0m')

            for engineitem in engines:
                if engineitem == 'anubis':
                    from theHarvester.discovery import anubis
                    try:
                        anubis_search = anubis.SearchAnubis(word)
                        stor_lst.append(
                            store(anubis_search, engineitem, store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'baidu':
                    from theHarvester.discovery import baidusearch
                    try:
                        baidu_search = baidusearch.SearchBaidu(word, limit)
                        stor_lst.append(
                            store(baidu_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'binaryedge':
                    from theHarvester.discovery import binaryedgesearch
                    try:
                        binaryedge_search = binaryedgesearch.SearchBinaryEdge(
                            word, limit)
                        stor_lst.append(
                            store(binaryedge_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'bing' or engineitem == 'bingapi':
                    from theHarvester.discovery import bingsearch
                    try:
                        bing_search = bingsearch.SearchBing(word, limit, start)
                        bingapi = ''
                        if engineitem == 'bingapi':
                            bingapi += 'yes'
                        else:
                            bingapi += 'no'
                        stor_lst.append(
                            store(bing_search,
                                  'bing',
                                  process_param=bingapi,
                                  store_host=True,
                                  store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(e)

                elif engineitem == 'bufferoverun':
                    from theHarvester.discovery import bufferoverun
                    try:
                        bufferoverun_search = bufferoverun.SearchBufferover(
                            word)
                        stor_lst.append(
                            store(bufferoverun_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'censys':
                    from theHarvester.discovery import censysearch
                    try:
                        censys_search = censysearch.SearchCensys(word)
                        stor_lst.append(
                            store(censys_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)

                elif engineitem == 'certspotter':
                    from theHarvester.discovery import certspottersearch
                    try:
                        certspotter_search = certspottersearch.SearchCertspoter(
                            word)
                        stor_lst.append(
                            store(certspotter_search,
                                  engineitem,
                                  None,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'crtsh':
                    try:
                        from theHarvester.discovery import crtsh
                        crtsh_search = crtsh.SearchCrtsh(word)
                        stor_lst.append(
                            store(crtsh_search, 'CRTsh', store_host=True))
                    except Exception as e:
                        print(
                            f'\033[93m[!] A timeout occurred with crtsh, cannot find {args.domain}\n {e}\033[0m'
                        )

                elif engineitem == 'dnsdumpster':
                    try:
                        from theHarvester.discovery import dnsdumpster
                        dns_dumpster_search = dnsdumpster.SearchDnsDumpster(
                            word)
                        stor_lst.append(
                            store(dns_dumpster_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(
                            f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m'
                        )

                elif engineitem == 'duckduckgo':
                    from theHarvester.discovery import duckduckgosearch
                    duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(
                        word, limit)
                    stor_lst.append(
                        store(duckduckgo_search,
                              engineitem,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'fullhunt':
                    from theHarvester.discovery import fullhuntsearch
                    try:
                        fullhunt_search = fullhuntsearch.SearchFullHunt(word)
                        stor_lst.append(
                            store(fullhunt_search, engineitem,
                                  store_host=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)

                elif engineitem == 'github-code':
                    try:
                        from theHarvester.discovery import githubcode
                        github_search = githubcode.SearchGithubCode(
                            word, limit)
                        stor_lst.append(
                            store(github_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except MissingKey as ex:
                        print(ex)
                    else:
                        pass

                elif engineitem == 'google':
                    from theHarvester.discovery import googlesearch
                    google_search = googlesearch.SearchGoogle(
                        word, limit, start)
                    stor_lst.append(
                        store(google_search,
                              engineitem,
                              process_param=google_dorking,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'hackertarget':
                    from theHarvester.discovery import hackertarget
                    hackertarget_search = hackertarget.SearchHackerTarget(word)
                    stor_lst.append(
                        store(hackertarget_search, engineitem,
                              store_host=True))

                elif engineitem == 'hunter':
                    from theHarvester.discovery import huntersearch
                    # Import locally or won't work.
                    try:
                        hunter_search = huntersearch.SearchHunter(
                            word, limit, start)
                        stor_lst.append(
                            store(hunter_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'intelx':
                    from theHarvester.discovery import intelxsearch
                    # Import locally or won't work.
                    try:
                        intelx_search = intelxsearch.SearchIntelx(word)
                        stor_lst.append(
                            store(intelx_search,
                                  engineitem,
                                  store_interestingurls=True,
                                  store_emails=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                f'An exception has occurred in Intelx search: {e}'
                            )

                elif engineitem == 'linkedin':
                    from theHarvester.discovery import linkedinsearch
                    linkedin_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    stor_lst.append(
                        store(linkedin_search, engineitem, store_people=True))

                elif engineitem == 'linkedin_links':
                    from theHarvester.discovery import linkedinsearch
                    linkedin_links_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    stor_lst.append(
                        store(linkedin_links_search,
                              'linkedin',
                              store_links=True))

                elif engineitem == 'omnisint':
                    from theHarvester.discovery import omnisint
                    try:
                        omnisint_search = omnisint.SearchOmnisint(word)
                        stor_lst.append(
                            store(omnisint_search, engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'otx':
                    from theHarvester.discovery import otxsearch
                    try:
                        otxsearch_search = otxsearch.SearchOtx(word)
                        stor_lst.append(
                            store(otxsearch_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'pentesttools':
                    from theHarvester.discovery import pentesttools
                    try:
                        pentesttools_search = pentesttools.SearchPentestTools(
                            word)
                        stor_lst.append(
                            store(pentesttools_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                f'An exception has occurred in PentestTools search: {e}'
                            )

                elif engineitem == 'projectdiscovery':
                    from theHarvester.discovery import projectdiscovery
                    try:
                        projectdiscovery_search = projectdiscovery.SearchDiscovery(
                            word)
                        stor_lst.append(
                            store(projectdiscovery_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                'An exception has occurred in ProjectDiscovery'
                            )

                elif engineitem == 'qwant':
                    from theHarvester.discovery import qwantsearch
                    qwant_search = qwantsearch.SearchQwant(word, start, limit)
                    stor_lst.append(
                        store(qwant_search,
                              engineitem,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'rapiddns':
                    from theHarvester.discovery import rapiddns
                    try:
                        rapiddns_search = rapiddns.SearchRapidDns(word)
                        stor_lst.append(
                            store(rapiddns_search, engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'rocketreach':
                    from theHarvester.discovery import rocketreach
                    try:
                        rocketreach_search = rocketreach.SearchRocketReach(
                            word, limit)
                        stor_lst.append(
                            store(rocketreach_search,
                                  engineitem,
                                  store_links=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                f'An exception has occurred in RocketReach: {e}'
                            )

                elif engineitem == 'securityTrails':
                    from theHarvester.discovery import securitytrailssearch
                    try:
                        securitytrails_search = securitytrailssearch.SearchSecuritytrail(
                            word)
                        stor_lst.append(
                            store(securitytrails_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'sublist3r':
                    from theHarvester.discovery import sublist3r
                    try:
                        sublist3r_search = sublist3r.SearchSublist3r(word)
                        stor_lst.append(
                            store(sublist3r_search,
                                  engineitem,
                                  store_host=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'spyse':
                    from theHarvester.discovery import spyse
                    try:
                        spyse_search = spyse.SearchSpyse(word, limit)
                        stor_lst.append(
                            store(spyse_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'threatcrowd':
                    from theHarvester.discovery import threatcrowd
                    try:
                        threatcrowd_search = threatcrowd.SearchThreatcrowd(
                            word)
                        stor_lst.append(
                            store(threatcrowd_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'threatminer':
                    from theHarvester.discovery import threatminer
                    try:
                        threatminer_search = threatminer.SearchThreatminer(
                            word)
                        stor_lst.append(
                            store(threatminer_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'trello':
                    from theHarvester.discovery import trello
                    # Import locally or won't work.
                    trello_search = trello.SearchTrello(word)
                    stor_lst.append(
                        store(trello_search, engineitem, store_results=True))

                elif engineitem == 'twitter':
                    from theHarvester.discovery import twittersearch
                    twitter_search = twittersearch.SearchTwitter(word, limit)
                    stor_lst.append(
                        store(twitter_search, engineitem, store_people=True))

                elif engineitem == 'urlscan':
                    from theHarvester.discovery import urlscan
                    try:
                        urlscan_search = urlscan.SearchUrlscan(word)
                        stor_lst.append(
                            store(urlscan_search,
                                  engineitem,
                                  store_host=True,
                                  store_ip=True,
                                  store_interestingurls=True,
                                  store_asns=True))
                    except Exception as e:
                        print(e)

                elif engineitem == 'virustotal':
                    from theHarvester.discovery import virustotal
                    virustotal_search = virustotal.SearchVirustotal(word)
                    stor_lst.append(
                        store(virustotal_search, engineitem, store_host=True))

                elif engineitem == 'yahoo':
                    from theHarvester.discovery import yahoosearch
                    yahoo_search = yahoosearch.SearchYahoo(word, limit)
                    stor_lst.append(
                        store(yahoo_search,
                              engineitem,
                              store_host=True,
                              store_emails=True))

                elif engineitem == 'zoomeye':
                    try:
                        from theHarvester.discovery import zoomeyesearch
                        zoomeye_search = zoomeyesearch.SearchZoomEye(
                            word, limit)
                        stor_lst.append(
                            store(zoomeye_search,
                                  engineitem,
                                  store_host=True,
                                  store_emails=True,
                                  store_ip=True,
                                  store_interestingurls=True,
                                  store_asns=True))
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass
        else:
            try:
                # Check if dns_brute is defined
                rest_args.dns_brute
            except Exception:
                print('\033[93m[!] Invalid source.\n\n \033[0m')
                sys.exit(1)

    async def worker(queue):
        while True:
            # Get a "work item" out of the queue.
            stor = await queue.get()
            try:
                await stor
                queue.task_done()
                # Notify the queue that the "work item" has been processed.
            except Exception:
                queue.task_done()

    async def handler(lst):
        queue = asyncio.Queue()

        for stor_method in lst:
            # enqueue the coroutines
            queue.put_nowait(stor_method)
        # Create three worker tasks to process the queue concurrently.
        tasks = []
        for i in range(3):
            task = asyncio.create_task(worker(queue))
            tasks.append(task)

        # Wait until the queue is fully processed.
        await queue.join()

        # Cancel our worker tasks.
        for task in tasks:
            task.cancel()
        # Wait until all worker tasks are cancelled.
        await asyncio.gather(*tasks, return_exceptions=True)

    await handler(lst=stor_lst)
    return_ips: List = []
    if rest_args is not None and len(
            rest_filename) == 0 and rest_args.dns_brute is False:
        # Indicates user is using rest api but not wanting output to be saved to a file
        full = [
            host if ':' in host and word in host else
            word in host.split(':')[0] and host for host in full
        ]
        full = list({host for host in full if host})
        full.sort()
        # cast to string so Rest API can understand type
        return_ips.extend([
            str(ip) for ip in sorted(
                [netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])
        ])
        # return list(set(all_emails)), return_ips, full, '', ''
        return total_asns, interesting_urls, twitter_people_list_tracker, linkedin_people_list_tracker, \
            linkedin_links_tracker, all_urls, all_ip, all_emails, all_hosts
    # Sanity check to see if all_emails and all_hosts are defined.
    try:
        all_emails
    except NameError:
        print(
            '\n\n\033[93m[!] No emails found because all_emails is not defined.\n\n \033[0m'
        )
        sys.exit(1)
    try:
        all_hosts
    except NameError:
        print(
            '\n\n\033[93m[!] No hosts found because all_hosts is not defined.\n\n \033[0m'
        )
        sys.exit(1)

    # Results
    if len(total_asns) > 0:
        print(f'\n[*] ASNS found: {len(total_asns)}')
        print('--------------------')
        total_asns = list(sorted(set(total_asns)))
        for asn in total_asns:
            print(asn)

    if len(interesting_urls) > 0:
        print(f'\n[*] Interesting Urls found: {len(interesting_urls)}')
        print('--------------------')
        interesting_urls = list(sorted(set(interesting_urls)))
        for iurl in interesting_urls:
            print(iurl)

    if len(twitter_people_list_tracker) == 0 and 'twitter' in engines:
        print('\n[*] No Twitter users found.\n\n')
    else:
        if len(twitter_people_list_tracker) >= 1:
            print('\n[*] Twitter Users found: ' +
                  str(len(twitter_people_list_tracker)))
            print('---------------------')
            twitter_people_list_tracker = list(
                sorted(set(twitter_people_list_tracker)))
            for usr in twitter_people_list_tracker:
                print(usr)

    if len(linkedin_people_list_tracker) == 0 and 'linkedin' in engines:
        print('\n[*] No LinkedIn users found.\n\n')
    else:
        if len(linkedin_people_list_tracker) >= 1:
            print('\n[*] LinkedIn Users found: ' +
                  str(len(linkedin_people_list_tracker)))
            print('---------------------')
            linkedin_people_list_tracker = list(
                sorted(set(linkedin_people_list_tracker)))
            for usr in linkedin_people_list_tracker:
                print(usr)

    if len(linkedin_links_tracker) == 0 and ('linkedin' in engines
                                             or 'rocketreach' in engines):
        print(f'\n[*] LinkedIn Links found: {len(linkedin_links_tracker)}')
        linkedin_links_tracker = list(sorted(set(linkedin_links_tracker)))
        print('---------------------')
        for link in linkedin_people_list_tracker:
            print(link)

    length_urls = len(all_urls)
    if length_urls == 0:
        if len(engines) >= 1 and 'trello' in engines:
            print('\n[*] No Trello URLs found.')
    else:
        total = length_urls
        print('\n[*] Trello URLs found: ' + str(total))
        print('--------------------')
        all_urls = list(sorted(set(all_urls)))
        for url in sorted(all_urls):
            print(url)

    if len(all_ip) == 0:
        print('\n[*] No IPs found.')
    else:
        print('\n[*] IPs found: ' + str(len(all_ip)))
        print('-------------------')
        # use netaddr as the list may contain ipv4 and ipv6 addresses
        ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])
        print('\n'.join(map(str, ip_list)))
        ip_list = list(ip_list)

    if len(all_emails) == 0:
        print('\n[*] No emails found.')
    else:
        print('\n[*] Emails found: ' + str(len(all_emails)))
        print('----------------------')
        all_emails = sorted(list(set(all_emails)))
        print(('\n'.join(all_emails)))

    if len(all_hosts) == 0:
        print('\n[*] No hosts found.\n\n')
    else:
        print('\n[*] Hosts found: ' + str(len(all_hosts)))
        print('---------------------')
        all_hosts = sorted(list(set(all_hosts)))
        db = stash.StashManager()
        full = [
            host if ':' in host and word in host else
            word in host.split(':')[0] and host for host in full
        ]
        full = list({host for host in full if host})
        full.sort(key=lambda el: el.split(':')[0])
        for host in full:
            print(host)
        host_ip = [
            netaddr_ip.format()
            for netaddr_ip in sorted([netaddr.IPAddress(ip) for ip in ips])
        ]
        await db.store_all(word, host_ip, 'ip', 'DNS-resolver')

    # DNS brute force
    if dnsbrute and dnsbrute[0] is True:
        print('\n[*] Starting DNS brute force.')
        dns_force = dnssearch.DnsForce(word, dnsserver, verbose=True)
        hosts, ips = await dns_force.run()
        hosts = list({host for host in hosts if ':' in host})
        hosts.sort(key=lambda el: el.split(':')[0])
        # Check if Rest API is being used if so return found hosts
        if dnsbrute[1]:
            return hosts
        print('\n[*] Hosts found after DNS brute force:')
        db = stash.StashManager()
        for host in hosts:
            print(host)
            full.append(host)
        await db.store_all(word, hosts, 'host', 'dns_bruteforce')

    # TakeOver Checking
    if takeover_status:
        print('\n[*] Performing subdomain takeover check')
        print('\n[*] Subdomain Takeover checking IS ACTIVE RECON')
        search_take = takeover.TakeOver(all_hosts)
        await search_take.process(proxy=use_proxy)

    # DNS reverse lookup
    dnsrev = []
    if dnslookup is True:
        print('\n[*] Starting active queries.')
        # load the reverse dns tools
        from theHarvester.discovery.dnssearch import (
            generate_postprocessing_callback, reverse_all_ips_in_range,
            serialize_ip_range)

        # reverse each iprange in a separate task
        __reverse_dns_tasks = {}
        for entry in host_ip:
            __ip_range = serialize_ip_range(ip=entry, netmask='24')
            if __ip_range and __ip_range not in set(
                    __reverse_dns_tasks.keys()):
                print('\n[*] Performing reverse lookup on ' + __ip_range)
                __reverse_dns_tasks[__ip_range] = asyncio.create_task(
                    reverse_all_ips_in_range(
                        iprange=__ip_range,
                        callback=generate_postprocessing_callback(
                            target=word,
                            local_results=dnsrev,
                            overall_results=full),
                        nameservers=list(map(str, dnsserver.split(',')))
                        if dnsserver else None))

        # run all the reversing tasks concurrently
        await asyncio.gather(*__reverse_dns_tasks.values())

        # Display the newly found hosts
        print('\n[*] Hosts found after reverse lookup (in target domain):')
        print('--------------------------------------------------------')
        for xh in dnsrev:
            print(xh)

    # DNS TLD expansion
    dnstldres = []
    if dnstld is True:
        print('[*] Starting DNS TLD expansion.')
        a = dnssearch.DnsTld(word, dnsserver, verbose=True)
        res = a.process()
        print('\n[*] Hosts found after DNS TLD expansion:')
        print('----------------------------------------')
        for y in res:
            print(y)
            dnstldres.append(y)
            if y not in full:
                full.append(y)

    # Virtual hosts search
    if virtual == 'basic':
        print('\n[*] Virtual hosts:')
        print('------------------')
        for data in host_ip:
            basic_search = bingsearch.SearchBing(data, limit, start)
            await basic_search.process_vhost()
            results = await basic_search.get_allhostnames()
            for result in results:
                result = re.sub(r'[[</?]*[\w]*>]*', '', result)
                result = re.sub('<', '', result)
                result = re.sub('>', '', result)
                print((data + '\t' + result))
                vhost.append(data + ':' + result)
                full.append(data + ':' + result)
        vhost = sorted(set(vhost))
    else:
        pass

    # Screenshots
    screenshot_tups = []
    if len(args.screenshot) > 0:
        import time
        from aiomultiprocess import Pool
        from theHarvester.screenshot.screenshot import ScreenShotter
        screen_shotter = ScreenShotter(args.screenshot)
        path_exists = screen_shotter.verify_path()
        # Verify path exists if not create it or if user does not create it skip screenshot
        if path_exists:
            await screen_shotter.verify_installation()
            print(
                f'\nScreenshots can be found in: {screen_shotter.output}{screen_shotter.slash}'
            )
            start_time = time.perf_counter()
            print('Filtering domains for ones we can reach')
            unique_resolved_domains = {
                url.split(':')[0]
                for url in full if ':' in url and 'www.' not in url
            }
            if len(unique_resolved_domains) > 0:
                # First filter out ones that didn't resolve
                print(
                    'Attempting to visit unique resolved domains, this is ACTIVE RECON'
                )
                async with Pool(12) as pool:
                    results = await pool.map(screen_shotter.visit,
                                             list(unique_resolved_domains))
                    # Filter out domains that we couldn't connect to
                    unique_resolved_domains = list(
                        sorted({tup[0]
                                for tup in results if len(tup[1]) > 0}))
                async with Pool(3) as pool:
                    print(
                        f'Length of unique resolved domains: {len(unique_resolved_domains)} chunking now!\n'
                    )
                    # If you have the resources you could make the function faster by increasing the chunk number
                    chunk_number = 14
                    for chunk in screen_shotter.chunk_list(
                            unique_resolved_domains, chunk_number):
                        try:
                            screenshot_tups.extend(await pool.map(
                                screen_shotter.take_screenshot, chunk))
                        except Exception as ee:
                            print(
                                f'An exception has occurred while mapping: {ee}'
                            )
            end = time.perf_counter()
            # There is probably an easier way to do this
            total = end - start_time
            mon, sec = divmod(total, 60)
            hr, mon = divmod(mon, 60)
            total_time = "%02d:%02d" % (mon, sec)
            print(f'Finished taking screenshots in {total_time} seconds')
            print(
                '[+] Note there may be leftover chrome processes you may have to kill manually\n'
            )

    # Shodan
    shodanres = []
    if shodan is True:
        import json
        print('\033[94m[*] Searching Shodan. \033[0m')
        try:
            for ip in host_ip:
                print(('\tSearching for ' + ip))
                shodan = shodansearch.SearchShodan()
                shodandict = await shodan.search_ip(ip)
                await asyncio.sleep(2)
                rowdata = []
                for key, value in shodandict[ip].items():
                    if str(
                            value
                    ) == 'Not in Shodan' or 'Error occurred in the Shodan IP search module' in str(
                            value):
                        break
                    if isinstance(value, int):
                        value = str(value)

                    if isinstance(value, list):
                        value = ', '.join(map(str, value))
                    rowdata.append(value)
                shodanres.append(rowdata)
                print(json.dumps(shodandict[ip], indent=4, sort_keys=True))
                print('\n')
        except Exception as e:
            print(f'\033[93m[!] An error occurred with Shodan: {e} \033[0m')
    else:
        pass

    # Here we need to add explosion mode.
    # We have to take out the TLDs to do this.
    if args.dns_tld is not False:
        counter = 0
        for word in vhost:
            search_google = googlesearch.SearchGoogle(word, limit, counter)
            await search_google.process(google_dorking)
            emails = await search_google.get_emails()
            hosts = await search_google.get_hostnames()
            print(emails)
            print(hosts)
    else:
        pass

    if filename != '':
        print('\n[*] Reporting started.')
        try:
            if len(rest_filename) == 0:
                filename = filename.rsplit('.', 1)[0] + '.xml'
            else:
                filename = 'theHarvester/app/static/' + rest_filename.rsplit(
                    '.', 1)[0] + '.xml'
            # TODO use aiofiles if user is using rest api
            # XML REPORT SECTION
            with open(filename, 'w+') as file:
                file.write(
                    '<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
                for x in all_emails:
                    file.write('<email>' + x + '</email>')
                for x in full:
                    host, ip = x.split(':', 1) if ':' in x else (x, '')
                    if ip and len(ip) > 3:
                        file.write(
                            f'<host><ip>{ip}</ip><hostname>{host}</hostname></host>'
                        )
                    else:
                        file.write(f'<host>{host}</host>')
                for x in vhost:
                    host, ip = x.split(':', 1) if ':' in x else (x, '')
                    if ip and len(ip) > 3:
                        file.write(
                            f'<vhost><ip>{ip} </ip><hostname>{host}</hostname></vhost>'
                        )
                    else:
                        file.write(f'<vhost>{host}</vhost>')
                # TODO add Shodan output into XML report
                file.write('</theHarvester>')
                print('[*] XML File saved.')
        except Exception as error:
            print(
                f'\033[93m[!] An error occurred while saving the XML file: {error} \033[0m'
            )

        try:
            # JSON REPORT SECTION
            filename = filename.rsplit('.', 1)[0] + '.json'
            # create dict with values for json output
            json_dict: Dict = dict()
            # determine if variable exists
            # it should but just a sanity check
            if 'ip_list' in locals():
                if all_ip and len(all_ip) >= 1 and ip_list and len(
                        ip_list) > 0:
                    json_dict["ips"] = [str(ip) for ip in ip_list]

            if len(all_emails) > 0:
                json_dict["emails"] = [email for email in all_emails]

            if len(full) > 0:
                json_dict["hosts"] = [host for host in full]

            if vhost and len(vhost) > 0:
                json_dict["vhosts"] = [host for host in vhost]

            if len(interesting_urls) > 0:
                json_dict["interesting_urls"] = interesting_urls

            if len(all_urls) > 0:
                json_dict["trello_urls"] = all_urls

            if len(total_asns) > 0:
                json_dict["asns"] = total_asns

            if len(twitter_people_list_tracker) > 0:
                json_dict["twitter_people"] = twitter_people_list_tracker

            if len(linkedin_people_list_tracker) > 0:
                json_dict["linkedin_people"] = linkedin_people_list_tracker

            if len(linkedin_links_tracker) > 0:
                json_dict["linkedin_links"] = linkedin_links_tracker

            json_dict["shodan"] = shodanres
            with open(filename, 'wb+') as fp:
                fp.write(orjson.dumps(json_dict, option=orjson.OPT_SORT_KEYS))
            print('[*] JSON File saved.')
        except Exception as er:
            print(
                f'\033[93m[!] An error occurred while saving the JSON file: {er} \033[0m'
            )
        print('\n\n')
        sys.exit(0)
Exemplo n.º 6
0
    async def store(search_engine: Any,
                    source: str,
                    process_param: Any = None,
                    store_host: bool = False,
                    store_emails: bool = False,
                    store_ip: bool = False,
                    store_people: bool = False,
                    store_links: bool = False,
                    store_results: bool = False,
                    store_interestingurls: bool = False,
                    store_asns: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.

        :param search_engine: search engine to fetch details from
        :param source: source against which the details (corresponding to the search engine) need to be persisted
        :param process_param: any parameters to be passed to the search engine eg: Google needs google_dorking
        :param store_host: whether to store hosts
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        :param store_interestingurls: whether to store interesting urls
        :param store_asns: whether to store asns
        """
        await search_engine.process(use_proxy) if process_param is None else await \
            search_engine.process(process_param, use_proxy)
        db_stash = stash.StashManager()
        if source:
            print(
                f'\033[94m[*] Searching {source[0].upper() + source[1:]}. \033[0m'
            )
        if store_host:
            host_names = [
                host for host in filter(await search_engine.get_hostnames())
                if f'.{word}' in host
            ]
            if source != 'hackertarget' and source != 'pentesttools' and source != 'rapiddns':
                # If source is inside this conditional it means the hosts returned must be resolved to obtain ip
                full_hosts_checker = hostchecker.Checker(host_names)
                temp_hosts, temp_ips = await full_hosts_checker.check()
                ips.extend(temp_ips)
                full.extend(temp_hosts)
            else:
                full.extend(host_names)
            all_hosts.extend(host_names)
            await db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(await search_engine.get_emails())
            all_emails.extend(email_list)
            await db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = await search_engine.get_ips()
            all_ip.extend(ips_list)
            await db_stash.store_all(word, all_ip, 'ip', source)
        if store_results:
            email_list, host_names, urls = await search_engine.get_results()
            all_emails.extend(email_list)
            host_names = [
                host for host in filter(host_names) if f'.{word}' in host
            ]
            all_urls.extend(filter(urls))
            all_hosts.extend(host_names)
            await db.store_all(word, all_hosts, 'host', source)
            await db.store_all(word, all_emails, 'email', source)
        if store_people:
            people_list = await search_engine.get_people()
            if source == 'twitter':
                twitter_people_list_tracker.extend(people_list)
            if source == 'linkedin':
                linkedin_people_list_tracker.extend(people_list)
            await db_stash.store_all(word, people_list, 'people', source)

        if store_links:
            links = await search_engine.get_links()
            linkedin_links_tracker.extend(links)
            if len(links) > 0:
                await db.store_all(word, links, 'linkedinlinks', engineitem)

        if store_interestingurls:
            iurls = await search_engine.get_interestingurls()
            interesting_urls.extend(iurls)
            if len(iurls) > 0:
                await db.store_all(word, iurls, 'interestingurls', engineitem)
        if store_asns:
            fasns = await search_engine.get_asns()
            total_asns.extend(fasns)
            if len(fasns) > 0:
                await db.store_all(word, fasns, 'asns', engineitem)
Exemplo n.º 7
0
def start():
    parser = argparse.ArgumentParser(
        description=
        'theHarvester is used to gather open source intelligence (OSINT) on a\n'
        'company or domain.')
    parser.add_argument('-d',
                        '--domain',
                        help='company name or domain to search',
                        required=True)
    parser.add_argument('-l',
                        '--limit',
                        help='limit the number of search results, default=500',
                        default=500,
                        type=int)
    parser.add_argument('-S',
                        '--start',
                        help='start with result number X, default=0',
                        default=0,
                        type=int)
    parser.add_argument('-g',
                        '--google-dork',
                        help='use Google Dorks for Google search',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '-p',
        '--port-scan',
        help=
        'scan the detected hosts and check for Takeovers (21,22,80,443,8080)',
        default=False,
        action='store_true')
    parser.add_argument('-s',
                        '--shodan',
                        help='use Shodan to query discovered hosts',
                        default=False,
                        action='store_true')
    parser.add_argument(
        '-v',
        '--virtual-host',
        help='verify host name via DNS resolution and search for virtual hosts',
        action='store_const',
        const='basic',
        default=False)
    parser.add_argument('-e',
                        '--dns-server',
                        help='DNS server to use for lookup')
    parser.add_argument(
        '-t',
        '--dns-tld',
        help='perform a DNS TLD expansion discovery, default False',
        default=False)
    parser.add_argument('-n',
                        '--dns-lookup',
                        help='enable DNS server lookup, default False',
                        default=False,
                        action='store_true')
    parser.add_argument('-c',
                        '--dns-brute',
                        help='perform a DNS brute force on the domain',
                        default=False,
                        action='store_true')
    parser.add_argument('-f',
                        '--filename',
                        help='save the results to an HTML and/or XML file',
                        default='',
                        type=str)
    parser.add_argument(
        '-b',
        '--source',
        help='''baidu, bing, bingapi, certspotter, crtsh, dnsdumpster,
                        dogpile, duckduckgo, github-code, google,
                        hunter, intelx,
                        linkedin, linkedin_links, netcraft, otx, securityTrails, spyse(disabled for now), threatcrowd,
                        trello, twitter, vhost, virustotal, yahoo, all''')

    args = parser.parse_args()
    try:
        db = stash.StashManager()
        db.do_init()
    except Exception:
        pass

    all_emails: list = []
    all_hosts: list = []
    all_ip: list = []
    dnsbrute = args.dns_brute
    dnslookup = args.dns_lookup
    dnsserver = args.dns_server
    dnstld = args.dns_tld
    engines = []
    filename: str = args.filename
    full: list = []
    google_dorking = args.google_dork
    host_ip: list = []
    limit: int = args.limit
    ports_scanning = args.port_scan
    shodan = args.shodan
    start: int = args.start
    takeover_check = False
    all_urls: list = []
    vhost: list = []
    virtual = args.virtual_host
    word: str = args.domain

    def store(search_engine: Any,
              source: str,
              process_param: Any = None,
              store_host: bool = False,
              store_emails: bool = False,
              store_ip: bool = False,
              store_people: bool = False,
              store_data: bool = False,
              store_links: bool = False,
              store_results: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.

        :param search_engine: search engine to fetch details from
        :param source: source against which the details (corresponding to the search engine) need to be persisted
        :param process_param: any parameters to be passed to the search engine
                              eg: Google needs google_dorking
        :param store_host: whether to store hosts
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
        :param store_data: whether to fetch host from method get_data() and persist
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        """
        search_engine.process(
        ) if process_param is None else search_engine.process(process_param)
        db_stash = stash.StashManager()

        if store_host:
            host_names = filter(search_engine.get_hostnames())
            all_hosts.extend(host_names)
            db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(search_engine.get_emails())
            all_emails.extend(email_list)
            db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = search_engine.get_ips()
            all_ip.extend(ips_list)
            db_stash.store_all(word, all_ip, 'ip', source)
        if store_data:
            data = filter(search_engine.get_data())
            all_hosts.extend(data)
            db.store_all(word, all_hosts, 'host', source)
        if store_results:
            email_list, host_names, urls = search_engine.get_results()
            all_emails.extend(email_list)
            host_names = filter(host_names)
            all_urls.extend(filter(urls))
            all_hosts.extend(host_names)
            db.store_all(word, all_hosts, 'host', source)
            db.store_all(word, all_emails, 'email', source)
        if store_people:
            people_list = search_engine.get_people()
            db_stash.store_all(word, people_list, 'people', source)
            if len(people_list) == 0:
                print('\n[*] No users found.\n\n')
            else:
                print('\n[*] Users found: ' + str(len(people_list)))
                print('---------------------')
                for usr in sorted(list(set(people_list))):
                    print(usr)
        if store_links:
            links = search_engine.get_links()
            db.store_all(word, links, 'name', engineitem)
            if len(links) == 0:
                print('\n[*] No links found.\n\n')
            else:
                print(f'\n[*] Links found: {len(links)}')
                print('---------------------')
                for link in sorted(list(set(links))):
                    print(link)

    if args.source is not None:
        if args.source.lower() != 'all':
            engines = sorted(set(map(str.strip, args.source.split(','))))
        else:
            engines = Core.get_supportedengines()
        # Iterate through search engines in order
        if set(engines).issubset(Core.get_supportedengines()):
            print(f'\033[94m[*] Target: {word} \n \033[0m')

            for engineitem in engines:
                if engineitem == 'baidu':
                    print('\033[94m[*] Searching Baidu. \033[0m')
                    from theHarvester.discovery import baidusearch
                    try:
                        baidu_search = baidusearch.SearchBaidu(word, limit)
                        store(baidu_search,
                              engineitem,
                              store_host=True,
                              store_emails=True)
                    except Exception:
                        pass

                elif engineitem == 'bing' or engineitem == 'bingapi':
                    print('\033[94m[*] Searching Bing. \033[0m')
                    from theHarvester.discovery import bingsearch
                    try:
                        bing_search = bingsearch.SearchBing(word, limit, start)
                        bingapi = ''
                        if engineitem == 'bingapi':
                            bingapi += 'yes'
                        else:
                            bingapi += 'no'
                        store(bing_search,
                              'bing',
                              process_param=bingapi,
                              store_host=True,
                              store_emails=True)
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'certspotter':
                    print('\033[94m[*] Searching CertSpotter. \033[0m')
                    from theHarvester.discovery import certspottersearch
                    try:
                        certspotter_search = certspottersearch.SearchCertspoter(
                            word)
                        store(certspotter_search,
                              engineitem,
                              None,
                              store_host=True)
                    except Exception as e:
                        print(e)

                elif engineitem == 'crtsh':
                    try:
                        print('\033[94m[*] Searching CRT.sh. \033[0m')
                        from theHarvester.discovery import crtsh
                        crtsh_search = crtsh.SearchCrtsh(word)
                        store(crtsh_search, 'CRTsh', store_data=True)

                    except Exception:
                        print(
                            f'\033[93m[!] A timeout occurred with crtsh, cannot find {args.domain}\033[0m'
                        )

                elif engineitem == 'dnsdumpster':
                    try:
                        print('\033[94m[*] Searching DNSdumpster. \033[0m')
                        from theHarvester.discovery import dnsdumpster
                        dns_dumpster_search = dnsdumpster.SearchDnsDumpster(
                            word)
                        store(dns_dumpster_search, engineitem, store_host=True)
                    except Exception as e:
                        print(
                            f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m'
                        )

                elif engineitem == 'dogpile':
                    try:
                        print('\033[94m[*] Searching Dogpile. \033[0m')
                        from theHarvester.discovery import dogpilesearch
                        dogpile_search = dogpilesearch.SearchDogpile(
                            word, limit)
                        store(dogpile_search,
                              engineitem,
                              store_host=True,
                              store_emails=True)
                    except Exception as e:
                        print(
                            f'\033[93m[!] An error occurred with Dogpile: {e} \033[0m'
                        )

                elif engineitem == 'duckduckgo':
                    print('\033[94m[*] Searching DuckDuckGo. \033[0m')
                    from theHarvester.discovery import duckduckgosearch
                    duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(
                        word, limit)
                    store(duckduckgo_search,
                          engineitem,
                          store_host=True,
                          store_emails=True)

                elif engineitem == 'github-code':
                    print('\033[94m[*] Searching Github (code). \033[0m')
                    try:
                        from theHarvester.discovery import githubcode
                        github_search = githubcode.SearchGithubCode(
                            word, limit)
                        store(github_search,
                              engineitem,
                              store_host=True,
                              store_emails=True)
                    except MissingKey as ex:
                        print(ex)
                    else:
                        pass

                elif engineitem == 'exalead':
                    print('\033[94m[*] Searching Exalead \033[0m')
                    from theHarvester.discovery import exaleadsearch
                    exalead_search = exaleadsearch.SearchExalead(
                        word, limit, start)
                    store(exalead_search,
                          engineitem,
                          store_host=True,
                          store_emails=True)

                elif engineitem == 'google':
                    print('\033[94m[*] Searching Google. \033[0m')
                    from theHarvester.discovery import googlesearch
                    google_search = googlesearch.SearchGoogle(
                        word, limit, start)
                    store(google_search,
                          engineitem,
                          process_param=google_dorking,
                          store_host=True,
                          store_emails=True)

                elif engineitem == 'hunter':
                    print('\033[94m[*] Searching Hunter. \033[0m')
                    from theHarvester.discovery import huntersearch
                    # Import locally or won't work.
                    try:
                        hunter_search = huntersearch.SearchHunter(
                            word, limit, start)
                        store(hunter_search,
                              engineitem,
                              store_host=True,
                              store_emails=True)
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'intelx':
                    print('\033[94m[*] Searching Intelx. \033[0m')
                    from theHarvester.discovery import intelxsearch
                    # Import locally or won't work.
                    try:
                        intelx_search = intelxsearch.SearchIntelx(word, limit)
                        store(intelx_search,
                              engineitem,
                              store_host=True,
                              store_emails=True)
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            print(
                                f'An exception has occurred in Intelx search: {e}'
                            )

                elif engineitem == 'linkedin':
                    print('\033[94m[*] Searching Linkedin. \033[0m')
                    from theHarvester.discovery import linkedinsearch
                    linkedin_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    store(linkedin_search, engineitem, store_people=True)

                elif engineitem == 'linkedin_links':
                    print('\033[94m[*] Searching Linkedin. \033[0m')
                    from theHarvester.discovery import linkedinsearch
                    linkedin_links_search = linkedinsearch.SearchLinkedin(
                        word, limit)
                    store(linkedin_links_search, 'linkedin', store_links=True)

                elif engineitem == 'netcraft':
                    print('\033[94m[*] Searching Netcraft. \033[0m')
                    from theHarvester.discovery import netcraft
                    netcraft_search = netcraft.SearchNetcraft(word)
                    store(netcraft_search, engineitem, store_host=True)

                elif engineitem == 'otx':
                    print('\033[94m[*] Searching AlienVault OTX. \033[0m')
                    from theHarvester.discovery import otxsearch
                    try:
                        otxsearch_search = otxsearch.SearchOtx(word)
                        store(otxsearch_search,
                              engineitem,
                              store_host=True,
                              store_ip=True)
                    except Exception as e:
                        print(e)

                elif engineitem == 'securityTrails':
                    print('\033[94m[*] Searching SecurityTrails. \033[0m')
                    from theHarvester.discovery import securitytrailssearch
                    try:
                        securitytrails_search = securitytrailssearch.SearchSecuritytrail(
                            word)
                        store(securitytrails_search,
                              engineitem,
                              store_host=True,
                              store_ip=True)
                    except Exception as e:
                        if isinstance(e, MissingKey):
                            print(e)
                        else:
                            pass

                elif engineitem == 'suip':
                    print(
                        '\033[94m[*] Searching Suip. This module can take 10+ mins to run but it is worth it.\033[0m'
                    )
                    from theHarvester.discovery import suip
                    try:
                        suip_search = suip.SearchSuip(word)
                        store(suip_search, engineitem, store_host=True)
                    except Exception as e:
                        print(e)

                # elif engineitem == 'spyse':
                #     print('\033[94m[*] Searching Spyse. \033[0m')
                #     from theHarvester.discovery import spyse
                #     try:
                #         spysesearch_search = spyse.SearchSpyse(word)
                #         spysesearch_search.process()
                #         hosts = filter(spysesearch_search.get_hostnames())
                #         all_hosts.extend(list(hosts))
                #         # ips = filter(spysesearch_search.get_ips())
                #         # all_ip.extend(list(ips))
                #         all_hosts.extend(hosts)
                #         db = stash.stash_manager()
                #         db.store_all(word, all_hosts, 'host', 'spyse')
                #         # db.store_all(word, all_ip, 'ip', 'spyse')
                #     except Exception as e:
                #         print(e)

                elif engineitem == 'threatcrowd':
                    print('\033[94m[*] Searching Threatcrowd. \033[0m')
                    from theHarvester.discovery import threatcrowd
                    try:
                        threatcrowd_search = threatcrowd.SearchThreatcrowd(
                            word)
                        store(threatcrowd_search, engineitem, store_host=True)
                    except Exception as e:
                        print(e)

                elif engineitem == 'trello':
                    print('\033[94m[*] Searching Trello. \033[0m')
                    from theHarvester.discovery import trello
                    # Import locally or won't work.
                    trello_search = trello.SearchTrello(word)
                    store(trello_search, engineitem, store_results=True)

                elif engineitem == 'twitter':
                    print(
                        '\033[94m[*] Searching Twitter usernames using Google. \033[0m'
                    )
                    from theHarvester.discovery import twittersearch
                    twitter_search = twittersearch.SearchTwitter(word, limit)
                    store(twitter_search, engineitem, store_people=True)

                elif engineitem == 'virustotal':
                    print('\033[94m[*] Searching VirusTotal. \033[0m')
                    from theHarvester.discovery import virustotal
                    virustotal_search = virustotal.SearchVirustotal(word)
                    store(virustotal_search, engineitem, store_host=True)

                elif engineitem == 'yahoo':
                    print('\033[94m[*] Searching Yahoo. \033[0m')
                    from theHarvester.discovery import yahoosearch
                    yahoo_search = yahoosearch.SearchYahoo(word, limit)
                    store(yahoo_search,
                          engineitem,
                          store_host=True,
                          store_emails=True)
        else:
            print('\033[93m[!] Invalid source.\n\n \033[0m')
            sys.exit(1)

    # Sanity check to see if all_emails and all_hosts are defined.
    try:
        all_emails
    except NameError:
        print(
            '\n\n\033[93m[!] No emails found because all_emails is not defined.\n\n \033[0m'
        )
        sys.exit(1)
    try:
        all_hosts
    except NameError:
        print(
            '\n\n\033[93m[!] No hosts found because all_hosts is not defined.\n\n \033[0m'
        )
        sys.exit(1)

    # Results
    if len(all_ip) == 0:
        print('\n[*] No IPs found.')
    else:
        print('\n[*] IPs found: ' + str(len(all_ip)))
        print('-------------------')
        # use netaddr as the list may contain ipv4 and ipv6 addresses
        ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])
        print('\n'.join(map(str, ip_list)))

    if len(all_emails) == 0:
        print('\n[*] No emails found.')
    else:
        print('\n[*] Emails found: ' + str(len(all_emails)))
        print('----------------------')
        print(('\n'.join(sorted(list(set(all_emails))))))

    if len(all_hosts) == 0:
        print('\n[*] No hosts found.\n\n')
    else:
        print('\n[*] Hosts found: ' + str(len(all_hosts)))
        print('---------------------')
        all_hosts = sorted(list(set(all_hosts)))
        full_host = hostchecker.Checker(all_hosts)
        full, ips = asyncio.run(full_host.check())
        db = stash.StashManager()
        for host in full:
            host = str(host)
            print(host)
        host_ip = [
            netaddr_ip.format()
            for netaddr_ip in sorted([netaddr.IPAddress(ip) for ip in ips])
        ]
        db.store_all(word, host_ip, 'ip', 'DNS-resolver')
    length_urls = len(all_urls)
    if length_urls == 0:
        if len(engines) >= 1 and 'trello' in engines:
            print('\n[*] No Trello URLs found.')
    else:
        total = length_urls
        print('\n[*] Trello URLs found: ' + str(total))
        print('--------------------')
        for url in sorted(all_urls):
            print(url)

    # DNS brute force
    # dnsres = []
    if dnsbrute is True:
        print('\n[*] Starting DNS brute force.')
        a = dnssearch.DnsForce(word, dnsserver, verbose=True)
        a.process()
        # print('\n[*] Hosts found after DNS brute force:')
        # for y in res:
        # print('-------------------------------------')
        #    print(y)
        #   dnsres.append(y.split(':')[0])
        #    if y not in full:
        #        full.append(y)
        # db = stash.stash_manager()
        # db.store_all(word, dnsres, 'host', 'dns_bruteforce')

    # Port scanning
    if ports_scanning is True:
        print('\n\n[*] Scanning ports (active).\n')
        for x in full:
            host = x.split(':')[1]
            domain = x.split(':')[0]
            if host != 'empty':
                print(('[*] Scanning ' + host))
                ports = [21, 22, 80, 443, 8080]
                try:
                    scan = port_scanner.PortScan(host, ports)
                    openports = scan.process()
                    if len(openports) > 1:
                        print(('\t[*] Detected open ports: ' +
                               ','.join(str(e) for e in openports)))
                    takeover_check = 'True'
                    if takeover_check == 'True' and len(openports) > 0:
                        search_take = takeover.TakeOver(domain)
                        search_take.process()
                except Exception as e:
                    print(e)

    # DNS reverse lookup
    dnsrev = []
    if dnslookup is True:
        print('\n[*] Starting active queries.')
        analyzed_ranges = []
        for entry in host_ip:
            print(entry)
            ip = entry.split(':')[0]
            ip_range = ip.split('.')
            ip_range[3] = '0/24'
            s = '.'
            ip_range = s.join(ip_range)
            if not analyzed_ranges.count(ip_range):
                print('[*] Performing reverse lookup in ' + ip_range)
                a = dnssearch.DnsReverse(ip_range, True)
                a.list()
                res = a.process()
                analyzed_ranges.append(ip_range)
            else:
                continue
            for entries in res:
                if entries.count(word):
                    dnsrev.append(entries)
                    if entries not in full:
                        full.append(entries)
        print('[*] Hosts found after reverse lookup (in target domain):')
        print('--------------------------------------------------------')
        for xh in dnsrev:
            print(xh)

    # DNS TLD expansion
    dnstldres = []
    if dnstld is True:
        print('[*] Starting DNS TLD expansion.')
        a = dnssearch.DnsTld(word, dnsserver, verbose=True)
        res = a.process()
        print('\n[*] Hosts found after DNS TLD expansion:')
        print('----------------------------------------')
        for y in res:
            print(y)
            dnstldres.append(y)
            if y not in full:
                full.append(y)

    # Virtual hosts search
    if virtual == 'basic':
        print('\n[*] Virtual hosts:')
        print('------------------')
        for l in host_ip:
            basic_search = bingsearch.SearchBing(l, limit, start)
            basic_search.process_vhost()
            results = basic_search.get_allhostnames()
            for result in results:
                result = re.sub(r'[[\<\/?]*[\w]*>]*', '', result)
                result = re.sub('<', '', result)
                result = re.sub('>', '', result)
                print((l + '\t' + result))
                vhost.append(l + ':' + result)
                full.append(l + ':' + result)
        vhost = sorted(set(vhost))
    else:
        pass

    # Shodan
    shodanres = []
    if shodan is True:
        import texttable
        tab = texttable.Texttable()
        header = [
            'IP address', 'Hostname', 'Org', 'Services:Ports', 'Technologies'
        ]
        tab.header(header)
        tab.set_cols_align(['c', 'c', 'c', 'c', 'c'])
        tab.set_cols_valign(['m', 'm', 'm', 'm', 'm'])
        tab.set_chars(['-', '|', '+', '#'])
        tab.set_cols_width([15, 20, 15, 15, 18])
        print('\033[94m[*] Searching Shodan. \033[0m')
        try:
            for ip in host_ip:
                print(('\tSearching for ' + ip))
                shodan = shodansearch.SearchShodan()
                rowdata = shodan.search_ip(ip)
                time.sleep(2)
                tab.add_row(rowdata)
            printedtable = tab.draw()
            print(printedtable)
        except Exception as e:
            print(f'\033[93m[!] An error occurred with Shodan: {e} \033[0m')
    else:
        pass

    # Here we need to add explosion mode.
    # We have to take out the TLDs to do this.
    if args.dns_tld is not False:
        counter = 0
        for word in vhost:
            search = googlesearch.SearchGoogle(word, limit, counter)
            search.process(google_dorking)
            emails = search.get_emails()
            hosts = search.get_hostnames()
            print(emails)
            print(hosts)
    else:
        pass

    # Reporting
    if filename != "":
        try:
            print('\n[*] Reporting started.')
            db = stash.StashManager()
            scanboarddata = db.getscanboarddata()
            latestscanresults = db.getlatestscanresults(word)
            previousscanresults = db.getlatestscanresults(word,
                                                          previousday=True)
            latestscanchartdata = db.latestscanchartdata(word)
            scanhistorydomain = db.getscanhistorydomain(word)
            pluginscanstatistics = db.getpluginscanstatistics()
            generator = statichtmlgenerator.HtmlGenerator(word)
            HTMLcode = generator.beginhtml()
            HTMLcode += generator.generatelatestscanresults(latestscanresults)
            HTMLcode += generator.generatepreviousscanresults(
                previousscanresults)
            graph = reportgraph.GraphGenerator(word)
            HTMLcode += graph.drawlatestscangraph(word, latestscanchartdata)
            HTMLcode += graph.drawscattergraphscanhistory(
                word, scanhistorydomain)
            HTMLcode += generator.generatepluginscanstatistics(
                pluginscanstatistics)
            HTMLcode += generator.generatedashboardcode(scanboarddata)
            HTMLcode += '<p><span style="color: #000000;">Report generated on ' + str(
                datetime.datetime.now()) + '</span></p>'
            HTMLcode += '''
            </body>
            </html>
            '''
            Html_file = open(filename, 'w')
            Html_file.write(HTMLcode)
            Html_file.close()
            print('[*] Reporting finished.')
            print('[*] Saving files.')
        except Exception as e:
            print(e)
            print(
                '\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m'
            )
            sys.exit(1)

        try:
            filename = filename.split('.')[0] + '.xml'
            file = open(filename, 'w')
            file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
            for x in all_emails:
                file.write('<email>' + x + '</email>')
            for x in full:
                x = x.split(':')
                if len(x) == 2:
                    file.write('<host>' + '<ip>' + x[1] + '</ip><hostname>' +
                               x[0] + '</hostname>' + '</host>')
                else:
                    file.write('<host>' + x + '</host>')
            for x in vhost:
                x = x.split(':')
                if len(x) == 2:
                    file.write('<vhost>' + '<ip>' + x[1] + '</ip><hostname>' +
                               x[0] + '</hostname>' + '</vhost>')
                else:
                    file.write('<vhost>' + x + '</vhost>')
            if shodanres != []:
                shodanalysis = []
                for x in shodanres:
                    res = x.split('SAPO')
                    file.write('<shodan>')
                    file.write('<host>' + res[0] + '</host>')
                    file.write('<port>' + res[2] + '</port>')
                    file.write('<banner><!--' + res[1] + '--></banner>')
                    reg_server = re.compile('Server:.*')
                    temp = reg_server.findall(res[1])
                    if temp:
                        shodanalysis.append(res[0] + ':' + temp[0])
                    file.write('</shodan>')
                if shodanalysis:
                    shodanalysis = sorted(set(shodanalysis))
                    file.write('<servers>')
                    for x in shodanalysis:
                        file.write('<server>' + x + '</server>')
                    file.write('</servers>')

            file.write('</theHarvester>')
            file.flush()
            file.close()
            print('[*] Files saved.')
        except Exception as er:
            print(
                f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m'
            )
        print('\n\n')
        sys.exit(0)
Exemplo n.º 8
0
    def store(search_engine: Any,
              source: str,
              process_param: Any = None,
              store_host: bool = False,
              store_emails: bool = False,
              store_ip: bool = False,
              store_people: bool = False,
              store_data: bool = False,
              store_links: bool = False,
              store_results: bool = False) -> None:
        """
        Persist details into the database.
        The details to be stored is controlled by the parameters passed to the method.

        :param search_engine: search engine to fetch details from
        :param source: source against which the details (corresponding to the search engine) need to be persisted
        :param process_param: any parameters to be passed to the search engine
                              eg: Google needs google_dorking
        :param store_host: whether to store hosts
        :param store_emails: whether to store emails
        :param store_ip: whether to store IP address
        :param store_people: whether to store user details
        :param store_data: whether to fetch host from method get_data() and persist
        :param store_links: whether to store links
        :param store_results: whether to fetch details from get_results() and persist
        """
        search_engine.process(
        ) if process_param is None else search_engine.process(process_param)
        db_stash = stash.StashManager()

        if store_host:
            host_names = filter(search_engine.get_hostnames())
            all_hosts.extend(host_names)
            db_stash.store_all(word, all_hosts, 'host', source)
        if store_emails:
            email_list = filter(search_engine.get_emails())
            all_emails.extend(email_list)
            db_stash.store_all(word, email_list, 'email', source)
        if store_ip:
            ips_list = search_engine.get_ips()
            all_ip.extend(ips_list)
            db_stash.store_all(word, all_ip, 'ip', source)
        if store_data:
            data = filter(search_engine.get_data())
            all_hosts.extend(data)
            db.store_all(word, all_hosts, 'host', source)
        if store_results:
            email_list, host_names, urls = search_engine.get_results()
            all_emails.extend(email_list)
            host_names = filter(host_names)
            all_urls.extend(filter(urls))
            all_hosts.extend(host_names)
            db.store_all(word, all_hosts, 'host', source)
            db.store_all(word, all_emails, 'email', source)
        if store_people:
            people_list = search_engine.get_people()
            db_stash.store_all(word, people_list, 'people', source)
            if len(people_list) == 0:
                print('\n[*] No users found.\n\n')
            else:
                print('\n[*] Users found: ' + str(len(people_list)))
                print('---------------------')
                for usr in sorted(list(set(people_list))):
                    print(usr)
        if store_links:
            links = search_engine.get_links()
            db.store_all(word, links, 'name', engineitem)
            if len(links) == 0:
                print('\n[*] No links found.\n\n')
            else:
                print(f'\n[*] Links found: {len(links)}')
                print('---------------------')
                for link in sorted(list(set(links))):
                    print(link)