def get_urls(self): print('\tSearching Trello URLs.') try: rawres = myparser.Parser(self.totalresults, "trello.com") trello_urls = rawres.urls() visited = set() for url in trello_urls: # Iterate through Trello URLs gathered and visit them, append text to totalresults. if url not in visited: # Make sure visiting unique URLs. visited.add(url) self.totalresults += requests.get(url=url, headers={'User-Agent': googleUA}).text rawres = myparser.Parser(self.totalresults, self.word) return rawres.hostnames(), trello_urls except Exception as e: print(f'Error occurred: {e}')
def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) to_parse = rawres.people_twitter() # fix invalid handles that look like @user other_output handles = set() for handle in to_parse: handle = str(handle).strip() if len(handle) > 2: if ' ' in handle: handle = handle.split(' ')[0] # strip off period at the end if exists if handle[len(handle) - 1] == '.': handle = handle[:len(handle) - 1] # strip periods if contains three of them if '...' in handle: handle = handle[:handle.index('.')] if '-' == handle[0]: handle = handle[1:] if '-' == handle[1]: handle = handle[0] + handle[2:] handles.add(handle) if '@' in handles: handles.remove('@') return handles
def get_emails(self): rawres = myparser.Parser(self.total_results, self.word) toparse_emails = rawres.emails() emails = set() # strip out numbers and dashes for emails that look like [email protected] for email in toparse_emails: email = str(email) if '-' in email and email[0].isdigit() and email.index('-') <= 9: while email[0] == '-' or email[0].isdigit(): email = email[1:] emails.add(email) return list(emails)
def get_files(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.fileurls(self.files)
def get_hostnames(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.hostnames()
def get_emails(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.emails()
def get_set(self): rawres = myparser.Parser(self.totalresults, list) return rawres.set()
def get_profiles(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.profiles()
def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.people_twitter()
def test_emails(self): word = 'domain.com' results = '@domain.com***a@domain***banotherdomain.com***[email protected]***[email protected]***' parse = myparser.Parser(results, word) emails = sorted(parse.emails()) assert emails, ['*****@*****.**', '*****@*****.**']
def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.people_linkedin()