def run(data,domain,filename,keyword,validation,api_key): cookie=data[0] company_id=data[1] email_format=data[2] profiles = get_company_profile(cookie,company_id,keyword) if profiles == None: logger.red('Unable to extract data from LinkedIn') quit() profiles_data=json.loads(profiles) results = profiles_data['elements'][0]['total'] per_page=40 pages = int(results / per_page) if results < per_page: pages=1 logger.blue('Identified %s page(s)' % logger.BLUE(pages)) logger.blue('Identified %s result(s)' % logger.BLUE(results)) if pages == 0: logger.red('Could not identify pages') quit() if results > 1000: logger.red('This method of enumeration can only extract 1000 users') # sleep(3) users=user_data(results,pages,cookie,company_id,domain,email_format,validation,api_key) job_role_count=word_occurrence.count(users) logger.write_out(users,domain,job_role_count,filename,validation) return users
def get_nullsessions(target): logger.blue('Testing null sessions on {}'.format(logger.BLUE(target))) rpc_command_lsaquery = 'rpcclient -U "" -N {} -c "lsaquery"'.format(target) result = run(rpc_command_lsaquery, stdout=PIPE, stderr=PIPE, universal_newlines=False, shell=True) if len(result.stdout) > 0 and len(result.stderr) == 0: command_output = result.stdout elif len(result.stderr) > 0 and len(result.stdout) == 0: command_output = result.stderr decoded = command_output.decode('utf-8') has_error = error_handle(decoded) try: output = decoded.rstrip().replace('\n', ' ') logger.verbose('Output from rpcclient: '+logger.YELLOW(str(output))) except: logger.verbose('Failed to get output from rpcclient') if has_error != False: logger.red_indent( 'Failed to authenticate with null sessions to {}'.format(logger.RED(target))) return False elif has_error == False: logger.green_indent( 'Successfully authenticated with null sessions to {}'.format(logger.GREEN(target))) return True
def icmp_scan(target): # takes in a list of targets and tries to identify a list of hosts responding to icmp and returns them in a list timeout = 2 logger.verbose('ICMP Timeout set to: '+str(timeout)) logger.blue('Pinging: {}'.format(logger.BLUE(target))) resp = sr1(IP(dst=str(target))/ICMP(), timeout=timeout, verbose=0) try: icmp_type = str(resp.getlayer(ICMP).code) resp_parse = icmp_response_parse(icmp_type) logger.verbose('Got ICMP Type: [{}] {}'.format( logger.YELLOW(icmp_type), logger.YELLOW(resp_parse))) except: logger.verbose('Could not get ICMP Type code for: ' + logger.YELLOW(target)) if resp is None: logger.verbose('Got no response from: '+logger.YELLOW(target)) logger.red_indent('{}: Down'.format(logger.RED(target))) result = None elif(int(resp.getlayer(ICMP).type) == 3 and int(resp.getlayer(ICMP).code) in [1, 2, 3, 9, 10, 13]): logger.red_indent('{}: Down'.format(logger.RED(target))) result = None else: logger.green_indent('{}: Up'.format(logger.GREEN(target))) result = target return result
def run(data): cookie = data.cookie company_id = data.company_id email_format = data.email_format keyword = data.keyword domain = data.domain validation = data.validation api_key = data.api_key filename = data.filename valid_emails_only = data.valid_emails_only logger.debug(str(vars(data))) profiles = linkedin_scraper.company_profile(cookie, company_id, keyword) if profiles == None: logger.red('Unable to extract data from LinkedIn') quit() company_profile_json = json.loads(profiles) try: total_employees = company_profile_json['elements'][0]['total'] except: logger.red('Failed to extract users, try generalising the keywords') logger.red('If this problem persists, create a issue on GitHub!') quit() per_page = 40 # Each response contains 40 profiles per page. pages = int( total_employees / per_page ) # Divide the amount of users by 40, this will give you the amount of pages logger.debug('Per page: %s' % per_page) if total_employees < per_page: logger.debug('Setting per_page to 1') pages = 1 logger.blue('Identified %s page(s)' % logger.BLUE(pages)) logger.blue('Identified %s result(s)' % logger.BLUE(total_employees)) if pages == 0: logger.red('Could not identify pages') quit() if total_employees > 1000: logger.red('This method of enumeration can only extract 1000 users') sleep(3) users = linkedin_scraper.get_users(data, pages, total_employees, keyword) job_role_count = role_occurrence.count(users, total_employees) if valid_emails_only: logger.valid_emails_only(users, filename) else: logger.dump(users, validation) logger.write_out(users, data, job_role_count, filename) return users
def get_shares(target, domain_name, remote_name, username, password): my_name = 'WIN-2003' logger.verbose('Client name configured to: '+logger.YELLOW(my_name)) logger.blue('Looking up shares on {}'.format(logger.BLUE(target))) server_ip = target if remote_name != None: logger.verbose('Connection status: [{} | {} | {}]'.format(logger.YELLOW( server_ip), logger.YELLOW(remote_name), logger.YELLOW(domain_name))) else: try: logger.verbose('Connection status: [{} | {} | {}]'.format(logger.YELLOW( server_ip), logger.YELLOW('Could not resolve name'), logger.YELLOW(domain_name))) except: pass open_shares = [] if remote_name == None: logger.red_indent('Could not get remote hosts name, skipping...') return None else: conn = SMBConnection(username, password, my_name, remote_name, domain=domain_name, use_ntlm_v2=True, is_direct_tcp=True) logger.verbose('SMB configuration:') logger.verbose('\tConnecting with: {}'.format(logger.YELLOW(username))) for k, v in vars(conn).items(): attribute = str(k) value = str(v) if '<class' not in value and 'bound method' not in value and 'object' not in value and "b''" not in value: logger.verbose('\t'+attribute+': '+value) try: conn.connect(server_ip, 445) logger.green('Successfully connected to {} on {}'.format( logger.GREEN('smb'), logger.GREEN(server_ip))) try: shares = conn.listShares(timeout=15) for share in range(len(shares)): share_name = str(shares[share].name) logger.green_indent_list(logger.GREEN(share_name)) open_shares.append(share_name) except Exception as e: logger.red_indent('Got error: {}'.format(logger.RED(e))) except: logger.red_indent( 'Failed to obtain shares from {}'.format(logger.RED(server_ip))) return open_shares
def get_name(target, timeout=5): logger.blue('Getting NetBIOS Name for {}'.format(logger.BLUE(target))) logger.verbose('Timeout for NetBIOS resolution: '+str(timeout)) bios = NetBIOS() try: tmpnetbios_name = bios.queryIPForName(target, timeout=timeout) netbios_name = str(tmpnetbios_name[0]) except: netbios_name = None bios.close() if netbios_name == None: logger.red_indent('Failed to get NetBIOS Name') return None else: logger.green_indent('Got NetBIOS Name: {}'.format( logger.GREEN(netbios_name))) return str(netbios_name)
def user_data(results, pages, cookie, company_id, domain, email_format): # Every page returns a dictionary of data, each dictionary is added to this list. users_per_page = [] for page in range(0, pages + 1): if page + 1 == 25: break if results < 40: # This method pulls 40 results per page. If the available results is less then 40 # Set results_per_age to whatever the number is results_per_page = results results_to_fetch = results else: # However, if the amount of available results is higher than the per page limit, set the per page limit to the max (40) results_per_page = 40 # Every time this is hit, the start point in the api is incremented. First, it gets 0 - 40, then 40 - 80 and so on. # This can be dynamically figured out by multiplying the page number (1) by the results_per_page (40). results_to_fetch = results_per_page * page # In order to stop this loop from requesting more than is available, and then breaking it, this if statement limits that: if results_to_fetch >= results: break url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%s" % ( company_id, results_to_fetch) logger.blue('Pulling from page %s' % logger.BLUE(page)) data = http.connect(url, cookie) result = data.text.encode('UTF-8') try: result = json.loads(result) except Exception as e: x = str(e) logger.red(e) quit() users = extract_data(result, domain, email_format) users_per_page.append(users) return users_per_page
def port_scan(target, ports): src_port = RandShort() FIN = 0x01 SYN = 0x02 RST = 0x04 PSH = 0x08 ACK = 0x10 SYNACK = 0x12 RSTACK = 0x14 URG = 0x20 ECE = 0x40 CWR = 0x80 logger.blue('Checking TCP ports: {}'.format(logger.BLUE(target))) for port in ports: send_syn = sr1(IP(dst=target)/TCP(sport=src_port, dport=port, flags=SYN), verbose=0, timeout=2) if send_syn == None: logger.verbose( 'Recieved no TCP response from: '+logger.YELLOW(target)) logger.red_indent('{}:{} [{}]'.format(logger.RED( target), logger.RED(str(port)), logger.RED('CLOSED'))) elif(send_syn.haslayer(TCP)): if(send_syn.getlayer(TCP).flags == SYNACK): send_ack = sr(IP(dst=target)/TCP(sport=src_port, dport=port, flags=RST), verbose=0, timeout=2) logger.verbose('Recieved SYNACK from {}, responding with RST'.format( logger.YELLOW(target))) logger.green_indent('{}:{} [{}]'.format(logger.GREEN( target), logger.GREEN(str(port)), logger.GREEN('OPEN'))) logger.verbose('Found alive host: ' + logger.YELLOW(target)) return target elif (send_syn.getlayer(TCP).flags == RSTACK): logger.verbose('Recieved RSTACK from: ' + logger.YELLOW(target)) logger.red_indent('{}:{} [{}]'.format(logger.RED( target), logger.RED(str(port)), logger.RED('CLOSED'))) elif (send_syn.getlayer(TCP).flags == RST): logger.verbose('Recieved RST from: '+logger.YELLOW(target)) logger.red_indent('{}:{} [{}]'.format(logger.RED( target), logger.RED(str(port)), logger.RED('CLOSED'))) return None
else: filename = None if args.keyword == None: keyword = None else: keyword = args.keyword if args.format: email_schemes = naming_scheme.email_schemes email_format = args.format.lower() if email_format not in email_schemes: logger.red( 'Unknown email scheme specified, please see the available below:') for i in email_schemes: logger.blue(i) quit() else: email_format = 'firstname.surname' if args.company_id == None: logger.red('Please specify a company id with the %s flag' % logger.RED('-i')) quit() if args.domain == None: logger.red('Please specify a domain with the %s flag' % logger.RED('-d')) quit() if args.validate: if args.validate.lower() == 'o365': validation = 'o365'
if args.keyword == None: keyword = None else: keyword = args.keyword if args.format: email_schemes = [ 'firstname.surname', 'firstnamesurname', 'f.surname', 'fsurname', 'surname.firstname', 'surnamefirstname', 's.firstname', 'sfirstname' ] email_format = args.format.lower() if email_format not in email_schemes: logger.red( 'Unknown email scheme specified, please see the available below:') for i in email_schemes: logger.blue(i) quit() else: email_format = 'firstname.surname' if args.company_id == None: logger.red('Please specify a company id with the %s flag' % logger.RED('-i')) quit() if args.domain == None: logger.red('Please specify a domain with the %s flag' % logger.RED('-d')) quit() if args.validate: if args.validate.lower() == 'o365': logger.blue('Validating users via %s' % logger.BLUE('Office365'))
def main(): pool = ThreadPool(processes=args.threads) logger.VERBOSE = args.verbose logger.LIVE = args.live start_time=strftime("%H:%M:%S", gmtime()) filetypes=['txt','csv','html','all'] if args.format: if args.format.lower() not in filetypes: logger.red('Did not understand the format supplied: [{}]'.format(logger.RED(args.format))) quit() if args.ports: p = [] ports = args.ports if "-" in ports: try: start = int(ports.split('-')[0]) end = int(ports.split('-')[1]) for port in range(start, end+1): p.append(port) except: print('failed to split on "-"') quit() elif "," in args.ports: ports = [int(n) for n in args.ports.split(",")] p = ports elif len(args.ports) > 0 and "-" not in args.ports and "," not in args.ports: try: p.append(int(args.ports)) except ValueError: print('Please specify an port number') quit() else: p = [53, 88, 139, 445, 464] if args.ports: logger.verbose('Ports configuration: '+str(p)) target = args.target # to be replaced with argparse hosts = get_targets(target) # all possible hosts scan_type=args.enumerate logger.blue('Target: [{}]'.format(logger.BLUE(target))) logger.blue('Found {} target(s)'.format(logger.BLUE(str(len(hosts))))) if scan_type == None: logger.blue('Scan type: [{}]'.format(logger.BLUE('default'))) else: logger.blue('Scan type: [{}]'.format(logger.BLUE(scan_type))) if args.ports: logger.blue('Ports given: [{}]'.format(logger.BLUE(args.ports))) logger.blue('Port count: [{}]'.format(logger.BLUE(str(len(p))))) username,password=cred_split(args.credentials) if username and password: logger.blue('Username: [{}]'.format(logger.BLUE(username))) logger.blue('Password: [{}]'.format(logger.BLUE(password))) if args.domain: domain=args.domain else: domain='WORKGROUP' logger.blue('Domain: [{}]'.format(logger.BLUE(domain))) logger.header('SCANNING') logger.blue('Start time: '+logger.BLUE(start_time)) if args.mode != None: if args.mode.upper() == 'ICMP': logger.verbose('Discovery mode set to ICMP') # alive_hosts = icmp_scan(hosts) # all hosts that respond to icmp alive_hosts = pool.map(icmp_scan, hosts) elif args.mode.upper() == 'PORTS': logger.verbose('Discovery mode set to ports') # alive_hosts = port_scan(hosts, p) alive_hosts = pool.map(partial(port_scan, hosts), p) elif args.mode.upper() == 'SKIP': logger.verbose('Discovery mode set to skip, scanning all {} hosts'.format(logger.YELLOW(str(len(hosts))))) alive_hosts = hosts else: logger.red('Unknown option for -m! Only skip, port and icmp can be used!') quit() else: logger.verbose('No discovery mode set, skipping') alive_hosts = hosts # all hosts that respond to icmp #Before enumeration, this just fixes some weird errors. Somehow the ports function returns a list and stores it a list. Like: [[]]. The next two lines fix that #and then removes any empties. alive_hosts=[''.join(x) for x in alive_hosts] #join into one list alive_hosts=list(filter(None, alive_hosts))#remove empties alive_hosts=list(set(alive_hosts))#removes duplicates # create an empty list that will store all the Host objects enumerated_hosts = [] # for every host, do some enum; this could probably be done with multiprocessing if args.enumerate != None: if args.enumerate.lower() == 'null': pass elif args.enumerate.lower() == 'shares': pass else: logger.red('Unknown option for -e! Only null and shares can be used!') quit() enumerated_hosts = pool.map(hosts_enumeration, alive_hosts) end_time=strftime("%H:%M:%S", gmtime()) logger.blue('End time: '+logger.BLUE(end_time)) logger.header('RESULTS') results_parse(results_cache, scan_type) if args.output: outfile_name=args.output if args.format: outfo=args.format.lower() if outfo== 'txt': clean_output(outfile_name) output(results_cache,outfile_name,scan_type) elif outfo == 'csv': clean_output(outfile_name) csv_output(results_cache,outfile_name,scan_type) elif outfo == 'html': clean_output(outfile_name) html_output(results_cache,outfile_name,scan_type) elif outfo == 'all': try: outfile_name=outfile_name.split('.')[0] except: outfile_name=outfile_name clean_output(outfile_name) output(results_cache,outfile_name+'.txt',scan_type) csv_output(results_cache,outfile_name+'.csv',scan_type) html_output(results_cache,outfile_name+'.html',scan_type) else: clean_output(outfile_name) output(results_cache,outfile_name,scan_type)
def get_users(data,pages,total_employees,keyword): #Grab the user data per page cookie = data.cookie company_id = data.company_id email_format = data.email_format keyword = data.keyword domain = data.domain validation = data.validation api_key = data.api_key # Every page returns a dictionary of data, each dictionary is added to this list. people_on_this_page=0 logger.debug(str(vars(data))) userdata_per_page = [] for page in range(0,pages+1): if page+1 == 25: logger.debug('Breaking, pages exceed 25') break if total_employees < 40: logger.debug('Locking users per page to match total_employees') # This method pulls 40 total_employees per page. If the available total_employees is less then 40 # Set total_employees_per_age to whatever the number is total_employees_per_page = total_employees total_employees_to_fetch = total_employees else: logger.debug('Locking users per page to 40') # However, if the amount of available total_employees is higher than the per page limit, set the per page limit to the max (40) total_employees_per_page = 40 # Every time this is hit, the start point in the api is incremented. First, it gets 0 - 40, then 40 - 80 and so on. # This can be dynamically figured out by multiplying the page number (1) by the total_employees_per_page (40). total_employees_to_fetch = total_employees_per_page * page # In order to stop this loop from requesting more than is available, and then breaking it, this if statement limits that: if total_employees_to_fetch >= total_employees: break # Loop over pages if keyword == None: # Experimental if statement, this request should work at this point(?) logger.debug('No keyword set [getting user per page]') url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%s" % (company_id,total_employees_to_fetch) else: # In theory, this will only grab users per page with the keyword logger.debug('Using keyword %s' % logger.MAGENTA(keyword)) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&keywords=%s&origin=OTHER&q=guided&start=%s" % (company_id,keyword,total_employees_to_fetch) logger.debug('Requesting %s from get_users()' % url) logger.blue('Pulling from page %s' % logger.BLUE(page+1)) api_response=http.connect(url,cookie) result = api_response.text.encode('UTF-8') try: result = json.loads(result) #contains data for ~40 people except Exception as e: print(e) quit() people_on_this_page=people_on_this_page+len(result['elements'][0]['elements']) if people_on_this_page > 0: logger.green('Successfully pulled %s users' % logger.GREEN(str(people_on_this_page))) userdata_per_page.append(result) # This part could do with threading users = parse_users(data,userdata_per_page,total_employees) logger.debug('Sending list of json objects to parse_users()') return users
# The most important part... banner.banner() parser = argparse.ArgumentParser(description="Yet another LinkedIn scraper.") parser.add_argument("-c", "--cookie", required=True, metavar="", help="Cookie to authenticate to LinkedIn with [li_at]") parser.add_argument("-i", "--company-id", metavar="", help="Company ID number") parser.add_argument("-k", "--keyword", metavar="", help="Keyword for searches") parser.add_argument("-d", "--domain", metavar="", help="Company domain name") parser.add_argument("-o", "--output", metavar="", help="File to output to: Writes CSV, JSON and HTML.") parser.add_argument("-f", "--format", metavar="", help="Format for email addresses") args = parser.parse_args() try: with open(args.cookie,'r') as f: cookie=f.readline() logger.blue('Got cookie: [%s]' % logger.BLUE(cookie)) except: logger.blue('Got cookie: [%s]' % logger.BLUE(cookie)) cookie=args.cookie company_id=args.company_id domain=args.domain if args.output: filename = args.output else: filename=None if args.keyword == None: keyword = None