Пример #1
0
    def get_people(self):
        rawres = myparser.Parser(self.totalresults, self.word)
        to_parse = rawres.people_twitter()
        # fix invalid handles that look like @user other_output
        handles = set()
        for handle in to_parse:
            handle = str(handle).strip()
            if len(handle) > 2:
                if ' ' in handle:
                    handle = handle.split(' ')[0]
                # strip off period at the end if exists
                if handle[len(handle) - 1] == '.':
                    handle = handle[:len(handle) - 1]
                # strip periods if contains three of them
                if '...' in handle:
                    handle = handle[:handle.index('.')]
                if '-' == handle[0]:
                    handle = handle[1:]
                if '-' == handle[1]:
                    handle = handle[0] + handle[2:]
                handles.add(handle)
        if '@' in handles:
            handles.remove('@')

        return handles
Пример #2
0
 async def get_people(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     temp = await rawres.people_linkedin()
     return [
         person for person in temp if person[0] != '.'
         and '...' not in person and len(person.split()) != 1
     ]
Пример #3
0
 def get_urls(self):
     try:
         rawres = myparser.Parser(self.totalresults, 'trello.com')
         trello_urls = rawres.urls()
         visited = set()
         for url in trello_urls:
             # Iterate through Trello URLs gathered and visit them, append text to totalresults.
             if url not in visited:  # Make sure visiting unique URLs.
                 visited.add(url)
                 request = grequests.get(url=url,
                                         headers={'User-Agent': googleUA})
                 response = grequests.map([request])
                 self.totalresults = response[0].content.decode('UTF-8')
         rawres = myparser.Parser(self.totalresults, self.word)
         return rawres.hostnames(), trello_urls
     except Exception as e:
         print(f'Error occurred: {e}')
Пример #4
0
 def get_people(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     to_parse = rawres.people_twitter()
     # fix invalid handles that look like @user other_output
     handles = set()
     for handle in to_parse:
         result = re.search(r'^@?(\w){1,15}', handle)
         if result:
             handles.add(result.group(0))
     return handles
Пример #5
0
    def get_urls(self):
        try:
            rawres = myparser.Parser(self.totalresults, 'trello.com')
            self.trello_urls = set(rawres.urls())
            self.totalresults = ''
            # reset what totalresults as before it was just google results now it is trello results
            headers = {
                'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4'])
            }
            # do not change the headers
            req = (grequests.get(url, headers=headers, timeout=4)
                   for url in self.trello_urls)
            responses = grequests.imap(req, size=8)
            for response in responses:
                self.totalresults += response.content.decode('UTF-8')

            rawres = myparser.Parser(self.totalresults, self.word)
            self.hostnames = rawres.hostnames()
        except Exception as e:
            print(f'Error occurred: {e}')
Пример #6
0
    async def get_urls(self):
        try:
            rawres = myparser.Parser(self.totalresults, 'trello.com')
            self.trello_urls = set(await rawres.urls())
            self.totalresults = ''
            # reset what totalresults as before it was just google results now it is trello results
            headers = {
                'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4'])
            }
            # do not change the headers
            responses = await AsyncFetcher.fetch_all(self.trello_urls,
                                                     headers=headers,
                                                     proxy=self.proxy)
            for response in responses:
                self.totalresults += response

            rawres = myparser.Parser(self.totalresults, self.word)
            self.hostnames = await rawres.hostnames()
        except Exception as e:
            print(f'Error occurred: {e}')
Пример #7
0
 async def get_hostnames(self):
     rawres = myparser.Parser(self.results, self.word)
     new_lst = []
     for host in await rawres.hostnames():
         host = str(host)
         if host[0].isdigit():
             matches = re.match('.+([0-9])[^0-9]*$', host)
             # Get last digit of string and shift hostname to remove ip in string
             new_lst.append(host[matches.start(1) + 1:])
         else:
             new_lst.append(host)
     return new_lst
Пример #8
0
 async def get_emails(self):
     rawres = myparser.Parser(self.total_results, self.word)
     toparse_emails = await rawres.emails()
     emails = set()
     # strip out numbers and dashes for emails that look like [email protected]
     for email in toparse_emails:
         email = str(email)
         if '-' in email and email[0].isdigit() and email.index('-') <= 9:
             while email[0] == '-' or email[0].isdigit():
                 email = email[1:]
         emails.add(email)
     return list(emails)
Пример #9
0
 async def get_hostnames(self):
     rawres = myparser.Parser(self.total_results, self.word)
     return await rawres.hostnames()
Пример #10
0
 def get_files(self):
     rawres = myparser.Parser(self.total_results, self.word)
     return rawres.fileurls(self.files)
Пример #11
0
 async def get_profiles(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return rawres.profiles()
Пример #12
0
 async def get_emails(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return await rawres.emails()
Пример #13
0
 async def get_links(self):
     links = myparser.Parser(self.totalresults, self.word)
     return await splitter(await links.links_linkedin())
Пример #14
0
 def test_emails(self):
     word = 'domain.com'
     results = '@domain.com***a@domain***banotherdomain.com***[email protected]***[email protected]***'
     parse = myparser.Parser(results, word)
     emails = sorted(parse.emails())
     assert emails, ['*****@*****.**', '*****@*****.**']
Пример #15
0
 def get_emails(self):
     rawres = myparser.Parser(self.total_results, self.word)
     return rawres.emails()
Пример #16
0
 def get_hostnames(self):
     return myparser.Parser(self.results, self.word).hostnames()
Пример #17
0
 def get_links(self):
     links = myparser.Parser(self.totalresults, self.word)
     return splitter(links.links_linkedin())
Пример #18
0
 async def get_hostnames(self) -> list:
     parser = myparser.Parser(self.total_results, self.word)
     return await parser.hostnames()
Пример #19
0
 async def get_emails(self) -> set:
     parser = myparser.Parser(self.total_results, self.word)
     return await parser.emails()
Пример #20
0
 async def get_hostnames(self, proxy=False):
     self.proxy = proxy
     rawres = myparser.Parser(self.total_results, self.word)
     return await rawres.hostnames()
Пример #21
0
 async def get_people(self):
     rawres = myparser.Parser(self.totalresults, self.word)
     return await rawres.people_linkedin()
Пример #22
0
 async def parse_emails(self, content):
     rawres = myparser.Parser(content, self.word)
     return await rawres.emails()
Пример #23
0
 def get_hostnames(self):
     rawres = myparser.Parser(self.results, self.word)
     return rawres.hostnames()
Пример #24
0
 def get_allhostnames(self):
     rawres = myparser.Parser(self.total_results, self.word)
     return rawres.hostnames_all()
Пример #25
0
 async def parse_hostnames(self, content):
     rawres = myparser.Parser(content, self.word)
     return await rawres.hostnames()