Exemplo n.º 1
0
 def get_urls(self):
     print('\tSearching Trello URLs.')
     try:
         rawres = myparser.Parser(self.totalresults, "trello.com")
         trello_urls = rawres.urls()
         visited = set()
         for url in trello_urls:
             # Iterate through Trello URLs gathered and visit them, append text to totalresults.
             if url not in visited:  # Make sure visiting unique URLs.
                 visited.add(url)
                 self.totalresults += requests.get(url=url, headers={'User-Agent': googleUA}).text
         rawres = myparser.Parser(self.totalresults, self.word)
         return rawres.hostnames(), trello_urls
     except Exception as e:
         print(f'Error occurred: {e}')
Exemplo n.º 2
0
    def get_people(self):
        rawres = myparser.Parser(self.totalresults, self.word)
        to_parse = rawres.people_twitter()
        # fix invalid handles that look like @user other_output
        handles = set()
        for handle in to_parse:
            handle = str(handle).strip()
            if len(handle) > 2:
                if ' ' in handle:
                    handle = handle.split(' ')[0]
                # strip off period at the end if exists
                if handle[len(handle) - 1] == '.':
                    handle = handle[:len(handle) - 1]
                # strip periods if contains three of them
                if '...' in handle:
                    handle = handle[:handle.index('.')]
                if '-' == handle[0]:
                    handle = handle[1:]
                if '-' == handle[1]:
                    handle = handle[0] + handle[2:]
                handles.add(handle)
        if '@' in handles:
            handles.remove('@')

        return handles
Exemplo n.º 3
0
 def get_emails(self):
     rawres = myparser.Parser(self.total_results, self.word)
     toparse_emails = rawres.emails()
     emails = set()
     # strip out numbers and dashes for emails that look like [email protected]
     for email in toparse_emails:
         email = str(email)
         if '-' in email and email[0].isdigit() and email.index('-') <= 9:
             while email[0] == '-' or email[0].isdigit():
                 email = email[1:]
         emails.add(email)
     return list(emails)
Exemplo n.º 4
0
 def get_files(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return rawres.fileurls(self.files)
Exemplo n.º 5
0
 def get_hostnames(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return rawres.hostnames()
Exemplo n.º 6
0
 def get_emails(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return rawres.emails()
Exemplo n.º 7
0
 def get_set(self):
     rawres = myparser.Parser(self.totalresults, list)
     return rawres.set()
Exemplo n.º 8
0
 def get_profiles(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return rawres.profiles()
Exemplo n.º 9
0
 def get_people(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return rawres.people_twitter()
Exemplo n.º 10
0
 def test_emails(self):
     word = 'domain.com'
     results = '@domain.com***a@domain***banotherdomain.com***[email protected]***[email protected]***'
     parse = myparser.Parser(results, word)
     emails = sorted(parse.emails())
     assert emails, ['*****@*****.**', '*****@*****.**']
Exemplo n.º 11
0
 def get_people(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return rawres.people_linkedin()