def sourceforge(self, username): self.verbose('Checking SourceForge...') url = f"http://sourceforge.net/u/{username}/profile/" resp = self.request('GET', url) sfName = re.search(r'<title>(.+) / Profile', resp.text) if sfName: self.alert(f"Sourceforge username found - ({url})") # extract data sfJoin = re.search(r'<dt>Joined:</dt><dd>\s*(\d\d\d\d-\d\d-\d\d) ', resp.text) sfLocation = re.search(r'<dt>Location:</dt><dd>\s*(\w.*)', resp.text) sfGender = re.search(r'<dt>Gender:</dt><dd>\s*(\w.*)', resp.text) sfProjects = re.findall(r'class="project-info">\s*<a href="/p/.+/">(.+)</a>', resp.text) # establish non-match values sfName = sfName.group(1) sfJoin = sfJoin.group(1) if sfJoin else None sfLocation = sfLocation.group(1) if sfLocation else None sfGender = sfGender.group(1) if sfGender else None # build and display a table of the results tdata = [] tdata.append(['Resource', 'Sourceforge']) tdata.append(['Name', sfName]) tdata.append(['Profile URL', url]) tdata.append(['Joined', sfJoin]) tdata.append(['Location', sfLocation]) tdata.append(['Gender', sfGender]) for sfProj in sfProjects: tdata.append(['Projects', sfProj]) self.table(tdata, title='Sourceforge') # add the pertinent information to the database if not sfName: sfName = username fname, mname, lname = parse_name(sfName) self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='Sourceforge Contributor') else: self.output('Sourceforge username not found.')
def codeplex(self, username): self.verbose('Checking CodePlex...') url = f"http://www.codeplex.com/site/users/view/{username}" resp = self.request('GET', url) cpName = re.search(r'<h1 class="user_name" style="display: inline">(.+)</h1>', resp.text) if cpName: self.alert(f"CodePlex username found - ({url})") # extract data cpJoin = re.search(r'Member Since<span class="user_float">([A-Z].+[0-9])</span>', resp.text) cpLast = re.search(r'Last Visit<span class="user_float">([A-Z].+[0-9])</span>', resp.text) cpCoordinator = re.search(r'(?s)<p class="OverflowHidden">(.*?)</p>', resp.text) # establish non-match values cpName = cpName.group(1) if cpName else None cpJoin = cpJoin.group(1) if cpJoin else 'January 1, 1900' cpLast = cpLast.group(1) if cpLast else 'January 1, 1900' cpCoordinator = cpCoordinator.group(1) if cpCoordinator else '' # build and display a table of the results tdata = [] tdata.append(['Resource', 'CodePlex']) tdata.append(['Name', cpName]) tdata.append(['Profile URL', url]) tdata.append(['Joined', time.strftime('%Y-%m-%d', time.strptime(cpJoin, '%B %d, %Y'))]) tdata.append(['Date Last', time.strftime('%Y-%m-%d', time.strptime(cpLast, '%B %d, %Y'))]) cpCoordProject = re.findall(r'<a href="(http://.+)/" title=".+">(.+)<br /></a>', cpCoordinator) for cpReposUrl, cpRepos in cpCoordProject: tdata.append(['Project', f"{cpRepos} ({cpReposUrl})"]) self.table(tdata, title='CodePlex') # add the pertinent information to the database if not cpName: cpName = username fname, mname, lname = parse_name(cpName) self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='CodePlex Contributor') else: self.output('CodePlex username not found.')
def bitbucket(self, username): self.verbose('Checking Bitbucket...') url = f"https://bitbucket.org/api/2.0/users/{username}" resp = self.request('GET', url) data = resp.json() if 'username' in data: self.alert(f"Bitbucket username found - ({url})") # extract data from the optional fields bbName = data['display_name'] bbJoin = data['created_on'].split('T') # build and display a table of the results tdata = [] tdata.append(['Resource', 'Bitbucket']) tdata.append(['User Name', data['username']]) tdata.append(['Display Name', bbName]) tdata.append(['Location', data['location']]) tdata.append(['Joined', bbJoin[0]]) tdata.append(['Personal URL', data['website']]) tdata.append(['Bitbucket URL', data['links']['html']['href']]) #tdata.append(['Avatar URL', data['user']['avatar']]) # This works but is SOOOO long it messes up the table self.table(tdata, title='Bitbucket') # add the pertinent information to the database if not bbName: bbName = username fname, mname, lname = parse_name(bbName) self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='Bitbucket Contributor') else: self.output('Bitbucket username not found.')
def module_run(self, domains): key = self.keys.get('builtwith_api') # Maybe the commercial version? #url = 'http://api.builtwith.com/v5/api.json' # Free Version url = 'https://api.builtwith.com/free1/api.json' title = 'BuiltWith contact' for domain in domains: self.heading(domain, level=0) payload = {'KEY': key, 'LOOKUP': domain} resp = self.request('GET', url, params=payload) if 'error' in resp.json(): self.error(resp.json()['error']) continue for result in resp.json()['Results']: # extract and add emails to contacts emails = result['Meta']['Emails'] if emails is None: emails = [] for email in emails: self.insert_contacts(first_name=None, last_name=None, title=title, email=email) # extract and add names to contacts names = result['Meta']['Names'] if names is None: names = [] for name in names: fname, mname, lname = parse_name(name['Name']) self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title=title) # extract and consolidate hosts and associated technology data data = {} for path in result['Result']['Paths']: domain = path['Domain'] subdomain = path['SubDomain'] host = subdomain if domain in subdomain else '.'.join( filter(len, [subdomain, domain])) if not host in data: data[host] = [] data[host] += path['Technologies'] for host in data: # add host to hosts # *** might domain integrity issues here *** domain = '.'.join(host.split('.')[-2:]) if domain != host: self.insert_hosts(host) # process hosts and technology data if self.options['show_all']: for host in data: self.heading(host, level=0) # display technologies if data[host]: self.output(self.ruler * 50) for item in data[host]: for tag in item: self.output( f"{tag}: {textwrap.fill(self.to_unicode_str(item[tag]), 100, initial_indent='', subsequent_indent=self.spacer*2)}" ) self.output(self.ruler * 50)
def module_run(self, domains): url = 'http://pgp.key-server.io/pks/lookup' for domain in domains: self.heading(domain, level=0) payload = {'search': domain} resp = self.request('GET', url, params=payload) # split the response into the relevant lines lines = [x.strip() for x in re.split('[\n<>]', resp.text) if domain in x] results = [] for line in lines: # remove parenthesized items line = re.sub(r'\s*\(.*\)\s*', '', line) # parse out name and email address match = re.search(r'^(.*)<(.*)>$', line) if match: # clean up and append the parsed elements results.append(tuple([x.strip() for x in match.group(1, 2)])) results = list(set(results)) if not results: self.output('No results found.') continue for contact in results: name = contact[0].strip() fname, mname, lname = parse_name(name) email = contact[1] if email.lower().endswith(domain.lower()): self.insert_contacts( first_name=fname, middle_name=mname, last_name=lname, email=email, title='PGP key association', )
def gitorious(self, username): self.verbose('Checking Gitorious...') url = f"https://gitorious.org/~{username}" resp = self.request('GET', url) if re.search(rf'href="/~{username}" class="avatar"', resp.text): self.alert(f"Gitorious username found - ({url})") # extract data gitoName = re.search( r'<strong>([^<]*)</strong>\s+</li>\s+<li class="email">', resp.text) # Gitorious URL encodes the user's email to obscure it...lulz. No problem. gitoEmailRaw = re.search(r"eval\(decodeURIComponent\('(.+)'", resp.text) gitoEmail = re.search( r'mailto:([^\\]+)', unquote_plus( gitoEmailRaw.group(1))) if gitoEmailRaw else None gitoJoin = re.search(r'Member for (.+)', resp.text) gitoPersonalUrl = re.search(r'rel="me" href="(.+)">', resp.text) gitoProjects = re.findall( r'<tr class="project">\s+<td>\s+<a href="/([^"]*)">([^<]*)</a>\s+</td>\s+</tr>', resp.text) # establish non-match values gitoName = gitoName.group(1) if gitoName else None gitoEmail = gitoEmail.group(1) if gitoEmail else None gitoJoin = gitoJoin.group(1) if gitoJoin else None gitoPersonalUrl = gitoPersonalUrl.group( 1) if gitoPersonalUrl else None # build and display a table of the results tdata = [] tdata.append(['Resource', 'Gitorious']) tdata.append(['Name', gitoName]) tdata.append(['Profile URL', url]) tdata.append(['Membership', gitoJoin]) tdata.append(['Email', gitoEmail]) tdata.append(['Personal URL', gitoPersonalUrl]) for gitoProjUrl, gitoProjName in gitoProjects: tdata.append([ 'Project', f"{gitoProjName} (https://gitorious.org/{gitoProjUrl})" ]) self.table(tdata, title='Gitorious') # add the pertinent information to the database if not gitoName: gitoName = username fname, mname, lname = parse_name(gitoName) self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='Gitorious Contributor', email=gitoEmail) else: self.output('Gitorious username not found.')
def module_run(self, repos): for repo in repos: commits = self.query_github_api( endpoint=f"/repos/{quote_plus(repo[0])}/{quote_plus(repo[1])}/commits", payload={}, options={'max_pages': int(self.options['maxpages']) or None}, ) for commit in commits: for key in ('committer', 'author'): if self.options[key] and key in commit and commit[key]: url = commit[key]['html_url'] login = commit[key]['login'] self.insert_profiles(username=login, url=url, resource='Github', category='coding') if self.options[key] and key in commit['commit'] and commit['commit'][key]: name = commit['commit'][key]['name'] email = commit['commit'][key]['email'] fname, mname, lname = parse_name(name) self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, email=email, title='Github Contributor')
def github(self, username): self.verbose('Checking Github...') url = f"https://api.github.com/users/{username}" resp = self.request('GET', url) data = resp.json() if 'login' in data: self.alert(f"Github username found - ({url})") # extract data from the optional fields gitName = data['name'] if 'name' in data else None gitCompany = data['company'] if 'company' in data else None gitBlog = data['blog'] if 'blog' in data else None gitLoc = data['location'] if 'location' in data else None gitEmail = data['email'] if 'email' in data else None gitBio = data['bio'] if 'bio' in data else None gitJoin = data['created_at'].split('T') gitUpdate = data['updated_at'].split('T') # build and display a table of the results tdata = [] tdata.append(['Resource', 'Github']) tdata.append(['User Name', data['login']]) tdata.append(['Real Name', gitName]) if gitName else None tdata.append(['Profile URL', data['html_url']]) tdata.append(['Avatar URL', data['avatar_url']]) tdata.append(['Location', gitLoc]) tdata.append(['Company', gitCompany]) tdata.append(['Blog URL', gitBlog]) tdata.append(['Email', gitEmail]) tdata.append(['Bio', gitBio]) tdata.append(['Followers', data['followers']]) tdata.append(['ID', data['id']]) tdata.append(['Joined', gitJoin[0]]) tdata.append(['Updated', gitUpdate[0]]) self.table(tdata, title='Github') # add the pertinent information to the database if not gitName: gitName = username fname, mname, lname = parse_name(gitName) self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='Github Contributor') else: self.output('Github username not found.')
def get_contact_info(self, url): search_result = self.search_bing_api(url, 1) # Search by url. If the url doesn't match, it has potential to be a different person if search_result and search_result[0]["url"] == url: search_result = search_result[0] # "Name" is a misnomer, it actually refers to the link title link_title = search_result["name"] # Split the title on the pipe to get rid of "linkedIn" portion at the end name_and_title = link_title.split("|")[0] # Split whats left on the Dashes, which is usually name - title - company # some european LinkedIn sites use em-dash EM_DASH = b'\xe2\x80\x93'.decode('utf-8') delimeter_expression = '- | ' + EM_DASH name_title_company_list = re.split(delimeter_expression, name_and_title) # Parse out name fullname = name_title_company_list[0] fname, mname, lname = parse_name(fullname) # Sometimes "LinkedIn" is left at the end anyway, and we don't want to confuse that for the company if "linkedin" not in name_title_company_list[-1].lower(): company = name_title_company_list[-1] else: company = False # Try to parse out a title and company if it's there if "linkedin" not in name_title_company_list[1].lower(): if not company: title = name_title_company_list[1] else: title = f"{name_title_company_list[1]} at {company}" self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title=title) else: self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname)
def module_run(self, domains): url = 'https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers' resp = self.request('GET', url) if resp.status_code != 200: self.alert('When retrieving IANA PEN Registry, got HTTP status code ' + str(resp.status_code) + '!') for domain in domains: dom = re.escape(domain) regex = r'(\d+)\s*\n\s{2}(.*)\s*\n\s{4}(.*)\s*\n\s{6}(.*)&' + dom + r'\s*\n' matchfound = False for match in re.finditer(regex, resp.text, re.IGNORECASE): fullname = match.groups()[2] fname, mname, lname = parse_name(fullname) email = match.groups()[3] + '@' + domain self.insert_contacts( first_name=fname, middle_name=mname, last_name=lname, email=email ) matchfound = True if not matchfound: self.alert('No matches found for domain \'' + domain + '\'')
def module_run(self, companies): url = 'https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers' resp = self.request('GET', url) if resp.status_code != 200: self.alert( 'When retrieving IANA PEN Registry, got HTTP status code ' + str(resp.status_code) + '!') for company in companies: comp = re.escape(company) regex = r'(\d+)\s*\n\s{2}.*' + comp + r'.*\s*\n\s{4}(.*)\s*\n\s{6}(.*)\s*\n' matchfound = False for match in re.finditer(regex, resp.text, re.IGNORECASE): fullname = match.groups()[1] fname, mname, lname = parse_name(fullname) email = match.groups()[2].replace('&', '@') self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, email=email) matchfound = True if not matchfound: self.alert('No matches found for company \'' + company + '\'')
def module_run(self, entities): api_key = self.keys.get('fullcontact_api') base_url = 'https://api.fullcontact.com/v3/person.enrich' while entities: entity = entities.pop(0) payload = {'email': entity} headers = {'Authorization': 'Bearer ' + api_key} resp = self.request('POST', base_url, json=payload, headers=headers) if resp.status_code == 200: # parse contact information name = resp.json().get('fullName') if name: first_name, middle_name, last_name = parse_name(name) self.alert(name) emails = [entity] new_emails = resp.json()['details'].get('emails') or [] for email in new_emails: emails.append(email['value']) self.alert(email['value']) title = resp.json().get('title') organization = resp.json().get('organization') if title and organization: title = f"{title} at {organization}" elif organization: title = f"Employee at {organization}" if title: self.alert(title) # parse location region = resp.json().get('location') if region: self.alert(region) # insert contacts for email in emails: self.insert_contacts(first_name=first_name, middle_name=middle_name, last_name=last_name, title=title, email=email, region=region) # parse and insert profiles for resource in ['twitter', 'linkedin', 'facebook']: url = resp.json().get(resource) if url: username = url.split('/')[-1] self.alert(url) self.insert_profiles(username=username, url=url, resource=resource, category='social') elif resp.status_code == 202: # add emails queued by fullcontact back to the list entities.append(entity) self.output(f"{entity} queued and added back to the list.") else: self.output(f"{entity} - {resp.json()['message']}") # 600 requests per minute api rate limit sleep(.1)
def parse_fullname(self, name): fullname = name.split(" -")[0] fullname = fullname.split(" |")[0] fullname = fullname.split(",")[0] fname, mname, lname = parse_name(fullname) return fullname, fname, mname, lname