Пример #1
0
    def test_DomainToIPs(self):

        RL, DD, IPD = dataprun.GenerateWL(["unittest4C.log"])
        D2IP = dataprun.GenerateDomain2IP(RL, DD)
        answerDict = {
            "google.com": ["0.0.0.0"],
            "tmall.com": ["0.0.0.2"],
            "youtube.com": ["0.0.0.0"],
            "baidu.com": ["0.0.0.2"]
        }

        for dd in D2IP:
            self.assertIn(dd, answerDict)
            self.assertEqual(D2IP[dd], answerDict[dd])

        self.assertEqual(len(answerDict), len(D2IP))
Пример #2
0
    def test_MultipleAnswers(self):

        RL, DD, IPD = dataprun.GenerateWL(["unittest4MC.log"])
        answerDomain = ["google.com", "tmall.com", "youtube.com", "baidu.com"]
        answerIP = [
            "0.0.0.0", "0.0.0.2", "1.0.0.0", "1.0.0.2", "192.168.75.0",
            "192.168.75.2", "192.168.75.1", "192.168.75.3"
        ]

        for domain in DD:
            self.assertIn(domain, answerDomain)

        for ip in IPD:
            self.assertIn(ip, answerIP)

        self.assertEqual(len(DD.keys()), len(answerDomain))
        self.assertEqual(len(IPD.keys()), len(answerIP))
Пример #3
0
def main():
    '''
  domain2IP_matrix.py creates the domain to ip csr matrix for hindom project
  Usage: python3 domain2IP_matrix.py --dns_files /data/dns/2021-03-29_dns.05:00:00-06:00:00.log ...
  Requires: dataprun.py
  '''

    # Process command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--dns_files',
                        type=str,
                        required=True,
                        nargs='+',
                        help='Expects log file from /data/dns directory')
    FLAGS = parser.parse_args()

    RL, domain2index, ip2index = dataprun.GenerateWL(FLAGS.dns_files)
    domain2ip = dataprun.GenerateDomain2IP(RL, domain2index)

    # Create sparse matrix of domain to IP relations
    getDomainResolveIpCSR(domain2ip, domain2index, ip2index)
Пример #4
0
    def test_InvalidInput(self):

        ans = dataprun.GenerateWL(["unittest4I.log"])
        self.assertIs(ans, None)
Пример #5
0
    def test_PrunEffect(self):

        #This setting will removed all inputs
        RL, DD, IPD = dataprun.GenerateWL(["unittest4C.log"], ka=0, kd=0)
        self.assertEqual(len(DD.keys()), 0)
        self.assertEqual(len(IPD.keys()), 2)
Пример #6
0
def drdMatrix(filename):
    """
    - This is the module to be called from 'hin.py'. To do so,
            domainRegistrardomainMatrix = drdMatrix['filename']
            you can pass in a single file or list of files
    
    - returns CSR sparse matrix for two domains having same registrar
    """

    #Marking start of run time
    start_time = time.time()

    #the whois package only support these Top Level Domains (TLD)
    known_tld = [
        'com', 'uk', 'ac_uk', 'ar', 'at', 'pl', 'be', 'biz', 'br', 'ca', 'cc',
        'cl', 'club', 'cn', 'co', 'jp', 'co_jp', 'cz', 'de', 'store',
        'download', 'edu', 'education', 'eu', 'fi', 'fr', 'id', 'in_', 'info',
        'io', 'ir', 'is_is', 'it', 'kr', 'kz', 'lt', 'ru', 'lv', 'me', 'mobi',
        'mx', 'name', 'net', 'ninja', 'se', 'nu', 'nyc', 'nz', 'online', 'org',
        'pharmacy', 'press', 'pw', 'rest', 'ru_rf', 'security', 'sh', 'site',
        'space', 'tech', 'tel', 'theatre', 'tickets', 'tv', 'us', 'uz',
        'video', 'website', 'wiki', 'xyz'
    ]

    # Calling dataprun package for whitelisted domain names and corresponding indexes
    print("Calling Dataprun package for whitelisted domain names......\n")
    RL, DD, IPD = dataprun.GenerateWL(filename)

    # Filtering the whitelisted domain_names provided by dataprun package
    # with known TLDs supported by the whois package.
    domainName2IndexDictionary = {}
    for domainName, domainIndex in DD.items():
        if domainName.split(".")[-1] in known_tld:
            domainName2IndexDictionary[domainName] = domainIndex
        else:
            pass

    # calling 'whoisLookup()' to find registrars for the domain_names.
    print(
        "\nStarting whois lookup for finding registrar of each domain name...\n"
    )
    domainNameIndex2RegistrarDictionary, count_FailedLookups = whoisLookup(
        domainName2IndexDictionary)

    # getting domain-name indexes of 'domainNameIndex2RegistrarDictionary' to a list.
    domainNameIndexList = []
    for domainNameIndex, registrarName in domainNameIndex2RegistrarDictionary.items(
    ):
        domainNameIndexList.append(domainNameIndex)

    print(
        f"\nNumber of unseccessful registrar lookups = {count_FailedLookups}\n"
    )

    #calling 'csrMatrix()' to generate the domain-registrar-domain matrix
    print("Generating domain_registrar_domain matrix....\n")
    _csrmatrix = csrMatrix(domainNameIndexList,
                           domainNameIndex2RegistrarDictionary)

    print("CSR Sparse matrix generation is complete.\n")

    #Marking end of run time
    end_time = time.time()

    total_time = end_time - start_time

    print(f"\nTotal time to run : {total_time} seconds\n")

    return _csrmatrix