Exemplo n.º 1
0
    def test_lookup_domain_bad_servername(self):
        """
        + valid domain name with invalid servername
        """

        servername = ['127.0.0.1']
        domain = 'www.google.com'

        result = dnslib.lookup_domain(domain, servername)
        assert result is not None

        #- test response1 is none
        assert 'response1' in result
        assert result['response1'] is None
Exemplo n.º 2
0
    def test_lookup_domain_bad_servername(self):
        """
        + valid domain name with invalid servername
        """

        servername = ['127.0.0.1']
        domain = 'www.google.com'

        result = dnslib.lookup_domain(domain, servername)
        assert result is not None

        #- test response1 is none
        assert 'response1' in result
        assert result['response1'] is None
Exemplo n.º 3
0
    def test_lookup_domain_bad_domain_name(self):
        """
         2. test handling of bad case
            .1 test single domain w/ domain name
                * test result is not none (even in bad case, the return value should exist)
                    + no domain name given
        """
        #+ invalid domain name
        domain = 'www.gosdafeefwqmqwnpqpjdvzgle.s.adf.wefpqwfm.ewqfqpwqrqwn.com'
        result = dnslib.lookup_domain(domain)

        #- test response1 is none
        assert result is not None
        assert 'response1' in result

        #- test response1 is none
        assert result['response1'] is not None
        assert result['response1-ips'] == []
Exemplo n.º 4
0
    def test_lookup_domain_bad_domain_name(self):
        """
         2. test handling of bad case
            .1 test single domain w/ domain name
                * test result is not none (even in bad case, the return value should exist)
                    + no domain name given
        """
        #+ invalid domain name
        domain = 'www.gosdafeefwqmqwnpqpjdvzgle.s.adf.wefpqwfm.ewqfqpwqrqwn.com'
        result = dnslib.lookup_domain(domain)

        #- test response1 is none
        assert result is not None
        assert 'response1' in result

        #- test response1 is none
        assert result['response1'] is not None
        assert result['response1-ips'] == []
Exemplo n.º 5
0
    def test_lookup_domain_good(self, ipregex, domain='www.google.com'):
        """
        1. test good case response
            .1 test single domain w/ domain name
        :param ipregex: valid IP regular expression fixture
        :param domain: domain to be tested
        """

        result = dnslib.lookup_domain(domain)

        #* test result is not None
        assert result is not None

        #* test query domain is matched given domain
        assert result[
            'domain'] == domain, 'Requested domain name should match given domain name'

        #* test DNS is not none
        assert result['nameserver'] is not None

        #* test first reqsposne is not none
        assert result['response1'] is not None

        #+ test the ip of first response
        assert result['response1-ips'] is not None

        #- test the ip is in a valid formula && matched known ip
        for ip in result['response1-ips']:
            assert re.match(ipregex, ip), 'ip is invalid'

        #* test second response is in the result ( whatever in it)
        assert 'response2' in result, 'second response should be in the result ( whatever is in it)'

        #+ test second response is not none
        if result['response2'] is not None:
            assert 'response2-ips' in result
            #- test the returned ip is in a valid formula && match known ip
            for ip in result['response2-ips']:
                assert re.match(ipregex, ip), 'ip is invalid'
Exemplo n.º 6
0
    def test_lookup_domain_good(self, ipregex, domain='www.google.com'):
        """
        1. test good case response
            .1 test single domain w/ domain name
        :param ipregex: valid IP regular expression fixture
        :param domain: domain to be tested
        """

        result = dnslib.lookup_domain(domain)

        #* test result is not None
        assert result is not None

        #* test query domain is matched given domain
        assert result['domain'] == domain, 'Requested domain name should match given domain name'

        #* test DNS is not none
        assert result['nameserver'] is not None

        #* test first reqsposne is not none
        assert result['response1'] is not None

        #+ test the ip of first response
        assert result['response1-ips'] is not None

        #- test the ip is in a valid formula && matched known ip
        for ip in result['response1-ips']:
            assert re.match(ipregex, ip), 'ip is invalid'

        #* test second response is in the result ( whatever in it)
        assert 'response2' in result, 'second response should be in the result ( whatever is in it)'

        #+ test second response is not none
        if result['response2'] is not None:
            assert 'response2-ips' in result
            #- test the returned ip is in a valid formula && match known ip
            for ip in result['response2-ips']:
                assert re.match(ipregex, ip), 'ip is invalid'
Exemplo n.º 7
0
    def run_file(self, input_file):
        file_name, file_contents = input_file

        # Initialize the results for this input file.
        # This can be anything from file name to version
        # to any useful information.
        result = {'file_name': file_name}

        http_results = {}
        tls_results = {}
        dns_results = {}
        traceroute_results = {}
        url_metadata_results = {}
        file_metadata = {}
        file_comments = []

        # each pcap file is stored in a separate file
        # designated by a number. the indexes are stored
        # in the json file and the pcap files are stored
        # with their indexes as file names.
        pcap_results = {}
        pcap_indexes = {}
        url_index = 0
        index_row = None
        comments = ""

        # we may want to make this threaded and concurrent
        csvreader = csv.reader(file_contents, delimiter=',', quotechar='"')
        for row in csvreader:
            """
            First few lines are expected to be comments in key: value
            format. The first line after that could be our column header
            row, starting with "url", and the rest are data rows.
            This is a sample input file we're trying to parse:

            # comment: Global List,,,,,
            # date: 03-17-2015,,,,,
            # version: 1,,,,,
            # description: This is the global list. Last updated in 2012.,,,,
            url,country,category,description,rationale,provider
            http://8thstreetlatinas.com,glo,P**N,,,PRIV
            http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV

            """

            # parse file comments, if it looks like "key : value",
            # parse it as a key-value pair. otherwise, just
            # store it as a raw comment.
            if row[0][0] == '#':
                row = row[0][1:].strip()
                if len(row.split(':')) > 1:
                    key, value = row.split(':', 1)
                    key = key.strip()
                    value = value.strip()
                    file_metadata[key] = value
                else:
                    file_comments.append(row)
                continue

            # detect the header row and store it
            # it is usually the first row and starts with "url,"
            if row[0].strip().lower() == "url":
                index_row = row
                continue

            url = row[0].strip()
            if url is None:
                continue

            meta = row[1:]
            url_index = url_index + 1
            http_ssl = False
            ssl_port = 443
            http_path = '/'

            # parse the URL to extract netlocation, HTTP path, domain name,
            # and HTTP method (SSL or plain)
            try:
                urlparse_object = urlparse.urlparse(url)
                http_netloc = urlparse_object.netloc

                # if netloc is not urlparse-able, add // to the start
                # of URL
                if http_netloc == '':
                    urlparse_object = urlparse.urlparse('//%s' % (url))
                    http_netloc = urlparse_object.netloc

                domain_name = http_netloc.split(':')[0]

                http_path = urlparse_object.path
                if http_path == '':
                    http_path = '/'

                # we assume scheme is either empty, or "http", or "https"
                # other schemes (e.g. "ftp") are out of the scope of this
                # measuremnt
                if urlparse_object.scheme == "https":
                    http_ssl = True
                    if len(http_netloc.split(':')) == 2:
                        ssl_port = http_netloc.split(':')[1]

            except Exception as exp:
                logging.warning("%s: failed to parse URL: %s" % (url, exp))
                http_netloc = url
                http_ssl = False
                ssl_port = 443
                http_path = '/'
                domain_name = url

            # start tcpdump
            td = Tcpdump()
            tcpdump_started = False

            try:
                if self.record_pcaps:
                    td.start()
                    tcpdump_started = True
                    logging.info("%s: tcpdump started..." % (url))
                    # wait for tcpdump to initialize
                    time.sleep(1)
            except Exception as exp:
                logging.warning("%s: tcpdump failed: %s" % (url, exp))

            # HTTP GET
            logging.info("%s: HTTP" % (url))
            try:
                http_results[url] = http.get_request(http_netloc,
                                                     http_path,
                                                     ssl=http_ssl)
            except Exception as exp:
                logging.warning("%s: HTTP test failed: %s" % (url, exp))
                http_results[url] = {"exception": str(exp)}

            # TLS certificate
            # this will only work if the URL starts with https://
            if http_ssl:
                try:
                    tls_result = {}
                    logging.info("%s: TLS certificate" % (domain_name))
                    fingerprint, cert = tls.get_fingerprint(
                        domain_name, ssl_port)
                    tls_result['port'] = ssl_port
                    tls_result['fingerprint'] = fingerprint
                    tls_result['cert'] = cert

                    tls_results[domain_name] = tls_result
                except Exception as exp:
                    logging.warning(
                        "%s: TLS certfiticate download failed: %s" %
                        (domain_name, exp))
                    tls_results[domain_name] = {"exception": str(exp)}

            # DNS Lookup
            logging.info("%s: DNS" % (domain_name))
            try:
                dns_results[domain_name] = dnslib.lookup_domain(domain_name)
            except Exception as exp:
                logging.warning("%s: DNS lookup failed: %s" %
                                (domain_name, exp))
                dns_results[domain_name] = {"exception": str(exp)}

            # Traceroute
            for method in self.traceroute_methods:
                try:
                    logging.info("%s: Traceroute (%s)" %
                                 (domain_name, method.upper()))
                    traceroute_results[domain_name] = traceroute.traceroute(
                        domain_name, method=method)
                except Exception as exp:
                    logging.warning("%s: Traceroute (%s) failed: %s" %
                                    (domain_name, method.upper(), exp))
                    traceroute_results[domain_name] = {"exception": str(exp)}

            # end tcpdump
            if tcpdump_started:
                logging.info("%s: waiting for tcpdump..." % (url))
                # 2 seconds should be enough.
                time.sleep(2)
                td.stop()
                logging.info("%s: tcpdump stopped." % (url))
                pcap_indexes[url] = '%s-%s.pcap' % (file_name,
                                                    format(url_index, '04'))
                pcap_results[pcap_indexes[url]] = td.pcap()

            # Meta-data
            url_metadata_results[url] = meta

        result["http"] = http_results
        result["tls"] = tls_results
        result["dns"] = dns_results
        result["traceroute"] = traceroute_results

        # if we have an index row, we should turn URL metadata
        # into dictionaries
        if index_row is not None:
            indexed_url_metadata = {}
            for url, meta in url_metadata_results.items():
                try:
                    indexed_meta = {}
                    for i in range(1, len(index_row)):
                        indexed_meta[index_row[i]] = meta[i - 1]
                    indexed_url_metadata[url] = indexed_meta
                except:
                    indexed_url_metadata[url] = indexed_meta
                    continue
            url_metadata_results = indexed_url_metadata

        result["url_metadata"] = url_metadata_results
        result["file_metadata"] = file_metadata
        result["file_comments"] = file_comments
        if self.record_pcaps:
            result['pcap_indexes'] = pcap_indexes
            self.external_results = dict(self.external_results.items() +
                                         pcap_results.items())

        return result
Exemplo n.º 8
0
    def run_file(self, input_file):
        file_name, file_contents = input_file

        # Initialize the results for this input file.
        # This can be anything from file name to version
        # to any useful information.
        result = {}
        result["file_name"] = file_name


        http_results = {}
        tls_results = {}
        dns_results = {}
        traceroute_results = {}
        url_metadata_results = {}
        file_metadata = {}
        file_comments = []

        # each pcap file is stored in a separate file
        # designated by a number. the indexes are stored
        # in the json file and the pcap files are stored
        # with their indexes as file names.
        pcap_results = {}
        pcap_indexes = {}
        url_index = 0
        index_row = None
        comments = ""

        # we may want to make this threaded and concurrent
        csvreader = csv.reader(file_contents, delimiter=',', quotechar='"')
        for row in csvreader:
            """
            First few lines are expected to be comments in key: value
            format. The first line after that could be our column header
            row, starting with "url", and the rest are data rows.
            This is a sample input file we're trying to parse:

            # comment: Global List,,,,,
            # date: 03-17-2015,,,,,
            # version: 1,,,,,
            # description: This is the global list. Last updated in 2012.,,,,
            url,country,category,description,rationale,provider
            http://8thstreetlatinas.com,glo,P**N,,,PRIV
            http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV

            """

            # parse file comments, if it looks like "key : value",
            # parse it as a key-value pair. otherwise, just
            # store it as a raw comment.
            if row[0][0] == '#':
                row = row[0][1:].strip()
                if len(row.split(':')) > 1:
                    key, value = row.split(':', 1)
                    key = key.strip()
                    value = value.strip()
                    file_metadata[key] = value
                else:
                    file_comments.append(row)
                continue

            # detect the header row and store it
            # it is usually the first row and starts with "url,"
            if row[0].strip().lower() == "url":
                index_row = row
                continue

            url = row[0].strip()
            if url is None:
                continue

            meta = row[1:]
            url_index = url_index + 1
            http_ssl = False
            ssl_port = 443
            http_path = '/'

            # parse the URL to extract netlocation, HTTP path, domain name,
            # and HTTP method (SSL or plain)
            try:
                urlparse_object = urlparse.urlparse(url)
                http_netloc = urlparse_object.netloc

                # if netloc is not urlparse-able, add // to the start
                # of URL
                if http_netloc == '':
                    urlparse_object = urlparse.urlparse('//%s' % (url))
                    http_netloc = urlparse_object.netloc

                domain_name = http_netloc.split(':')[0]

                http_path = urlparse_object.path
                if http_path == '':
                    http_path = '/'

                # we assume scheme is either empty, or "http", or "https"
                # other schemes (e.g. "ftp") are out of the scope of this
                # measuremnt
                if urlparse_object.scheme == "https":
                    http_ssl = True
                    if len(http_netloc.split(':')) == 2:
                        ssl_port = http_netloc.split(':')[1]

            except Exception as exp:
                logging.warning("%s: failed to parse URL: %s" % (url, exp))
                http_netloc = url
                http_ssl    = False
                ssl_port = 443
                http_path   = '/'
                domain_name = url

            # start tcpdump
            td = Tcpdump()
            tcpdump_started = False

            try:
                if self.record_pcaps:
                    td.start()
                    tcpdump_started = True
                    logging.info("%s: tcpdump started..." % (url))
                    # wait for tcpdump to initialize
                    time.sleep(1)
            except Exception as exp:
                logging.warning("%s: tcpdump failed: %s" % (url, exp))

            # HTTP GET
            logging.info("%s: HTTP" % (url))
            try:
                http_results[url] = http.get_request(http_netloc,
                                                     http_path,
                                                     ssl=http_ssl)
            except Exception as exp:
                logging.info("%s: HTTP test failed: %s" %
                             (url, exp))
                http_results[url] = { "exception" : str(exp) }

            # TLS certificate
            # this will only work if the URL starts with https://
            if http_ssl:
                try:
                    tls_result = {}
                    logging.info("%s: TLS certificate" %
                                 (domain_name))
                    fingerprint, cert = tls.get_fingerprint(domain_name, ssl_port)
                    tls_result['port'] = ssl_port
                    tls_result['fingerprint'] = fingerprint
                    tls_result['cert'] = cert

                    tls_results[domain_name] = tls_result
                except Exception as exp:
                    logging.info("%s: TLS certfiticate download failed: %s" %
                                 (domain_name, exp))
                    tls_results[domain_name] = { "exception" : str(exp) }

            # DNS Lookup
            logging.info("%s: DNS" % (domain_name))
            try:
                dns_results[domain_name] = dnslib.lookup_domain(domain_name)
            except Exception as exp:
                logging.info("%s: DNS lookup failed: %s" %
                             (domain_name, exp))
                dns_results[domain_name] = { "exception" : str(exp) }

            # Traceroute
            for method in self.traceroute_methods:
                try:
                    logging.info("%s: Traceroute (%s)"
                                 % (domain_name, method.upper()))
                    traceroute_results[domain_name] = traceroute.traceroute(
                        domain_name, method=method)
                except Exception as exp:
                    logging.info("%s: Traceroute (%s) failed: %s" %
                                    (domain_name, method.upper(), exp))
                    traceroute_results[domain_name] = {
                        "exception" : str(exp) }

            # end tcpdump
            if tcpdump_started:
                logging.info("%s: waiting for tcpdump..." % (url))
                # 2 seconds should be enough.
                time.sleep(2)
                td.stop()
                logging.info("%s: tcpdump stopped." % (url))
                pcap_indexes[url] = '%s-%s.pcap' % (file_name,
                    format(url_index, '04'))
                pcap_results[pcap_indexes[url]] = td.pcap()

            # Meta-data
            url_metadata_results[url] = meta

        result["http"] = http_results
        result["tls"] = tls_results
        result["dns"] = dns_results
        result["traceroute"] = traceroute_results

        # if we have an index row, we should turn URL metadata
        # into dictionaries
        if index_row is not None:
            indexed_url_metadata = {}
            for url, meta in url_metadata_results.items():
                try:
                    indexed_meta = {}
                    for i in range(1,len(index_row)):
                        indexed_meta[index_row[i]] = meta[i - 1]
                    indexed_url_metadata[url] = indexed_meta
                except:
                    indexed_url_metadata[url] = indexed_meta
                    continue
            url_metadata_results = indexed_url_metadata

        result["url_metadata"] = url_metadata_results
        result["file_metadata"] = file_metadata
        result["file_comments"] = file_comments
        if self.record_pcaps:
            result['pcap_indexes'] = pcap_indexes
            self.external_results = dict(self.external_results.items() +
                                         pcap_results.items())

        return result