def get_crawl(path): path = "http://"+path domain = fc.findDomain(path) urls = fc.crawl(path, domain, 100) print "Length of URLS:", len(urls) out = fc.analyseMain(urls) return jsonify(output=out)
def check_vulnerabilities(): path = request.form.get("url") page = request.form.get("pages") domain = fc.findDomain(path) path = fc.getCorrectURL(path) urls = fc.crawl(path, domain, int(page)) data = open("./urllist.txt", "w") for url in urls: data.write("%s\n" % (url)) data.close() # Cleans the HDFS paths and loads the urllists to HDFS subprocess.call("./mrstartup.sh", shell=True) # Starts the MR job for the urls in the urllist subprocess.call("./mrlauncher.sh --input=/user/smullassery/syssec/urllist.txt --output=/user/smullassery/syssec/output", shell=True) # Reads the result from MR job result = subprocess.Popen(["hdfs", "dfs" ,"-cat", "/user/smullassery/syssec/output/*"], stdout=subprocess.PIPE) result_string, error = result.communicate() results = result_string.split('\t\n') print("Map Reduce job completed") final_result = [] for temp in results: if temp: list1 = ast.literal_eval(temp) if list1: final_result.extend(list1) out = final_result return jsonify(output=out)
def create_crawl(): path = request.form.get("url") page = request.form.get("pages") path = fc.getCorrectURL(path) if path == None: abort(404) domain = fc.findDomain(path) urls = fc.crawl(path, domain, int(page)) if len(urls) > int(page): urls = urls[:int(page)] out = fc.analyseMain(urls) if len(out) == 0: response = jsonify(output=out) response.status_code = 500 return response return jsonify(output=out)