示例#1
0
def processQueue():
    socket.setdefaulttimeout(35)
    socks.socket.setdefaulttimeout(35)
    dbConnection = psycopg2.connect(dbConnectionString)
    if not TorHandler.isQueueEmpty(dbConnection):
        currentNode = TorHandler.getNextNodeFromQueue(dbConnection)
        print "New node from queue:", currentNode.fingerprint, currentNode.nickname, currentNode.address
        testTorNode(dbConnection, currentNode)
        time.sleep(5)
    else:
        msg = "Queue empty, filling queue"
        logMessage(msg, "queue-fill")
        print msg
        socket.socket = socks._orgsocket
        TorHandler.fillQueue(dbConnection)
        logMessage("Queue filled", "queue-fill-success")
示例#2
0
        if currentNode.hasFTP and timeouts < 2:
            # test FTP
            try:
                print "Node supports FTP, using honey connection"
                testFTP(currentNode, "192.168.1.3")
                currentNode.madeFTPLogin = True
            except (socket.error, socks.Socks5Error, socks.GeneralProxyError, socks.ProxyError), e:
                print "problems with allocating FTP socket", e.message, traceback.format_exc()
                if "TTL expired" in e.message or "timed out" in e.message:
                    timeouts = timeouts + 1
                else:
                    raise        
    except (socket.error, socks.Socks5Error, socks.GeneralProxyError, socks.ProxyError), e:
        if "TTL expired" in e.message or "timed out" in e.message:
            print "node seems offline or broken, removing from queue..."
            TorHandler.removeNodeFromQueue(dbConnection, currentNode)
    except Exception, e:
        logMsg = "unknown exception:\nfingerprint: " + currentNode.fingerprint + "\nexception: " + e.message + "\nstacktrace:\n" + traceback.format_exc()
        logMessage(logMsg, "exception")
    finally:
        # Close the connection to the Tor network
        TorHandler.saveChangesToDB(dbConnection, currentNode)
        print "terminating tor process (if any)"
        try:
            tor_process.terminate()
            time.sleep(1)
            tor_process.kill()
        except Exception, e:
            print "there is no tor process (or spoon)"
        print "done with", currentNode.fingerprint, currentNode.nickname, "\n"
示例#3
0
def main():

    # TODO: Add command line switch to set db server? Or config file?
    dbserv = couchdb.Server('http://localhost:5984/')

    # CouchDB connection and db creation
    try:
        db = dbserv['sites']
    except socket.error as e:
        print("[E] Could not connect to the database!")
        if DEBUG:
            print("Error message: {0}".format(e))
        sys.exit(
            "Please make sure that the database has been started, and try again"
        )
    except couchdb.http.ResourceNotFound:
        if DEBUG:
            print("[I] Building initial database...")
        db = dbserv.create('sites')

    # start the Tor process
    handler = TorHandler.TorHandler()
    if not handler.start_tor():
        print(
            "[E] There was an error launching Tor. It may already be running.")

    if not handler.start_controller():
        print("[E] Could not connect to control port!")
        sys.exit("Please kill all running Tor instances and try again")

    domains = queue.Queue(0)
    url = check_http(args.url)
    domains.put(url)

    # Prints endpoint information if debugging is enabled
    if DEBUG:
        print(handler.check_endpoint())

    print("\nScraping for .ONION domains:\n")

    # Sets up DB entry for initial site being scraped
    if url not in db:
        prev_site = 'N/A - Starting Domain'
        current_time = datetime.datetime.now()
        urldoc = DB_Structure(_id=check_http(url),
                              url=check_http(url),
                              ref='None',
                              Discovered=current_time,
                              LastAccessed=None,
                              title='')
        urldoc.store(db)

    # Main scraping loop
    # Gathers domains into the database, and continues to scrape through each subsequent domain

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as e:
        while domains.qsize() > 0:
            scrape_array = []
            if domains.qsize() > 4:
                for x in range(0, 5):
                    scrape_array.append(domains.get())
            else:
                for x in range(0, domains.qsize()):
                    scrape_array.append(domains.get())

            scraper = [
                e.submit(scrape_site, x, domains, db, handler)
                for x in scrape_array
            ]

            # Hackish way to make the threads wait until the queue is populated again, or until all threads are done
            if domains.qsize() == 0:
                x = [
                    s.result for s in concurrent.futures.as_completed(scraper)
                ]

    print("\nScraping Complete.")

    if not handler.kill_tor():
        print("[E] Error killing the Tor process! It may still be running.")
    else:
        print("\nTor Instance Killed.")