def main(): #creating socket connection crawlerSocket=socks.create_sock(host,port) if (len(sys.argv) != 3): sys.exit('Please pass correct number of arguments') #python webcrawler.py 001798574 CQA2F4BJ #python webcrawler.py 001901571 EKAOJN77 response,cokie=login(crawlerSocket) crawl(cokie,crawlerSocket,response)
def crawl(cokie,sockObject,response): [stat,body,header_data]=webs.xtractInfo(response) urlCheck=webs.xtractLinks(body) i=0; for profileLink in urlCheck: url_unvisited.append(profileLink) if profileLink not in url_visited: profileLinkCrawl(profileLink,cokie,sockObject) url_visited.append(profileLink) i=i+1 try: url_unvisited.remove(profileLink) except: print("item not in list") else: continue while len(flag_list)<5: urlCheck=url_unvisited for profileLink in urlCheck: if profileLink in url_visited: continue else: profileLinkCrawl(profileLink,cokie,sockObject) url_visited.append(profileLink) i=i+1 try: url_unvisited.remove(profileLink) except: print("item not in list") urlCheck=url_unvisited sock2=socks.create_sock(host,port) sockObject=sock2 # print("The number of itiretions are "+str(i)) # print(url_unvisited) # print(url_visited) # print(flag_list) for flag in flag_list: print(flag)
def profileLinkCrawl(profileLink,cokie,sockObject): requestN=webs.reqGenCrawl(host,profileLink,userName,password,cokie) responseN=socks.handle_req(requestN,sockObject) [status,body,header_data]=webs.xtractInfo(responseN) while 1: if status=='500': sock1=socks.create_sock(host,port) requestN=webs.reqGenCrawl(host,profileLink,userName,password,cokie) responseN=socks.handle_req(requestN,sock1) [status2,body,header_data]=webs.xtractInfo(responseN) if status2=='200': sockObject=sock1 break else: status=status2 continue else: break try: urlList=webs.xtractLinks(body) if "<h2 class='secret_flag' style=" in body: flagCheck=webs.findSecretFlag(body) flag_list.append(flagCheck) for profileLink2 in urlList: if profileLink2 not in url_unvisited and profileLink not in url_visited: url_unvisited.append(profileLink2) except: sys.exit("Problem in profileLinkCrawl") return [status,body,header_data]
parms = [] for i in range(0,length): parms.append((sys.argv[i])) if length == 6: host, port, ID = inputchk.input_1(parms) #python client.py −p <port> −s [hostname] [NEU ID] #python client.py −p 27994 −s login.ccs.neu.edu 001798574 #python client.py −p 27994 −s login.ccs.neu.edu 001901571 sock = socks.create_ssl_sock(host, port) elif length == 5: host, port, ID = inputchk.input_2(parms) #python client.py −p <port> [hostname] [NEU ID] #python client.py −p 27993 login.ccs.neu.edu 001798574 #python client.py −p 27993 login.ccs.neu.edu 001901571 sock = socks.create_sock(host, port) elif length == 3: host, port, ID = inputchk.input_3(parms) #python client.py [hostname] [NEU ID] #python client.py login.ccs.neu.edu 001798574 #python client.py login.ccs.neu.edu 001901571 sock = socks.create_sock(host, port) elif length == 4: host, port, ID = inputchk.input_4(parms) #python client.py -s [hostname] [NEU ID] #python client.py −s login.ccs.neu.edu 001798574 #python client.py −s login.ccs.neu.edu 001901571 sock = socks.create_ssl_sock(host, port) else: sys.exit("Wrong number of arguments passed")