class test_tweet(unittest.TestCase): def setUp(self): self.aScrape = Scraping("realdonaldtrump", 2) self.aScrape.scrape_data() self.shortlist = self.aScrape.get_posts() def test_tweet(self): """ Testing that scraping actually returns a 'Tweet' type """ newTweet = self.shortlist[0] self.assertIsInstance(newTweet, tweet) def test_print_all(self): """ Trying to a raise an error by putting non-tweet types into a list of tweets, and printing them all with this method, causing a 'AttributeError', and showing the downside to pythons dynamic ducktyping. More specificly, im trying to put at scraping-object into the list """ dumbScrape = Scraping("realdonaldtrump", 1) self.shortlist.append(dumbScrape) self.assertRaises(AttributeError, tweet.print_tweetlist, self.shortlist)
def test_scrape_data(self): aScrape = Scraping("realdonaldtrump", 2) aScrape.scrape_data() print(type(aScrape.find_tweets[0])) self.assertGreater(len(aScrape.find_tweets),0)
def test_getpostList_Json(self): """ Testing that, when reading from a generated file, it is possible to get python-postpbjects from it. it is a little dumb, but you have to provide console input to run test """ newScrape = Scraping("BarackObama", 3) newScrape.scrape_data() payload = newScrape.get_posts() Organizer.create_project("BarackObama") Organizer.write_file_json(payload, "BarackObama") supposedpath = os.getcwd() + "/Jsondata_files/BarackObama" list , path = Organizer.getpostList_Json() self.assertIsInstance(list[0], tweet) self.assertEqual(path, supposedpath)
def test_get_posts(self): """ Trying to fill the wrong types of objects into """ aScrape = Scraping("realdonaldtrump", 2) aScrape.scrape_data() listofposts = aScrape.get_posts() self.assertTrue(listofposts)
def task_scrape(self, url): #titleタグ取得の関数 def getTitle_cl(cPyQuery): title = cPyQuery('title').text() return title try: cScraping = Scraping() title = cScraping.scrape('http://www.yahoo.co.jp', getTitle_cl) if title == None or title == '': raise Exception logger.info("task_scrape %s\t\t%s\n" % ( now_str, title )) except Exception as exc: raise exc #on_failure
def test_Scraping(self): """ Testing the 'Scraping' constructor """ print("testing Scraping construct") aScrape = Scraping("realdonaldtrump", 2) self.assertEqual(aScrape.r.url, "https://twitter.com/realdonaldtrump") self.assertEqual(aScrape.r.status_code, 200)
def test_print_all(self): """ Trying to a raise an error by putting non-tweet types into a list of tweets, and printing them all with this method, causing a 'AttributeError', and showing the downside to pythons dynamic ducktyping. More specificly, im trying to put at scraping-object into the list """ dumbScrape = Scraping("realdonaldtrump", 1) self.shortlist.append(dumbScrape) self.assertRaises(AttributeError, tweet.print_tweetlist, self.shortlist)
def _search(self, url): result = Scraping.scraping(url, self.BASEID) return int(result)
from Scraping import Scraping if __name__ == "__main__": url = 'http://t3chfest.uc3m.es' scraping = Scraping() scraping.scrapingImagesPdf(url) scraping.scrapingBeautifulSoup(url)
def _search_google(self, key): result_text = Scraping.scraping(self.BASEURL + key, self.BASEID) num_tokenizer = nltk.RegexpTokenizer(u'([0-9])') return int("".join(num_tokenizer.tokenize(result_text)))
# ログディレクトリ if config.has_option('SwitchCryptoCoinMining', 'LogDirectory'): LogDirectory = config['SwitchCryptoCoinMining']['LogDirectory'] else: LogDirectory = '' # スクレイピング対象のURL ScrapingURL = config['SwitchCryptoCoinMining']['URL'] # 初期値 NowCoin = "" proc = "" while True: # スクレイピング scrape = Scraping(url=ScrapingURL, log_dir=LogDirectory) crypto = scrape.scraping_crypto() for i in range(len(crypto) - 1): # コイン名,%を表示 print(str(i) + ',' + crypto[i][0] + ',' + crypto[i][1]) # MiningPoolhub # FTC if crypto[i][0] == 'Feathercoin(FTC)NeoScrypt': if not NowCoin == 'FTC': if NowCoin == 'Zcash(ZEC)' or NowCoin == 'ZCL': os.system("taskkill /f /im bminer.exe") else: # proc.terminate() os.system("taskkill /f /im ccminer-x64.exe") proc = StartMining.FTC()
print(author()) parser = argparse.ArgumentParser(description='Pentesting-tool') # Main arguments parser.add_argument("-target", dest="target", help="target IP / domain", required=None) parser.add_argument("-ports", dest="ports", help="Please, specify the target port(s) separated by comma[80,8080 by default]", default = "80,8080") parser.add_argument("-proxy", dest="proxy", help="Proxy[IP:PORT]", required=None) parsed_args = parser.parse_args() shodanSearch = ShodanSearch() dnsResolver = UtilDNS() sshConnection = SSHConnection() checker = Checker() scraping = Scraping() scanningNMAP = ScanningNMAP() infoLocation = InfoLocation() httpScan = HTTPScan() checkOpenSslVulnerable = CheckOpenSslVulnerable() checkFtpVulnerable = CheckFTPVulnerable() extractMails = ExtractMails() checkVuln_SQL_XSS_LFI = CheckVuln_SQL_XSS_LFI() #scannerScapy = ScannerScapy() #default port list ip = "" hostname = "" option = ""
def initializeData(TrieObject): T = TrieObject #Loading the data from local host and sending it to Scraping for extracting data #And then calling the initialize function Document.doc = "http://127.0.0.1//CS600/html_intro.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_editors.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_basic.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_elements.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_attributes.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_headings.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_paragraphs.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_styles.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_formatting.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() Document.doc = "http://127.0.0.1//CS600/html_quotation_elements.html" Document.words = Scraping.getDataFromWebPage(Document.doc) Document.initialize() Document.docData[Document.doc] = Scraping.DocumentData() return Document.T
dest="ports", help= "Please, specify the target port(s) separated by comma[80,8080 by default]", default="80,8080") parser.add_argument("-proxy", dest="proxy", help="Proxy[IP:PORT]", required=None) parsed_args = parser.parse_args() # shodanSearch = ShodanSearch() dnsResolver = UtilDNS() sshConnection = SSHConnection() checker = Checker() scraping = Scraping() scanningNMAP = ScanningNMAP() infoLocation = InfoLocation() httpScan = HTTPScan() checkOpenSslVulnerable = CheckOpenSslVulnerable() checkFtpVulnerable = CheckFTPVulnerable() extractMails = ExtractMails() checkVuln_SQL_XSS_LFI = CheckVuln_SQL_XSS_LFI() scannerScapy = ScannerScapy() #default port list ip = "" hostname = "-" option = ""
from Scraping import Scraping if __name__ == "__main__": url = 'http://www.google.es' scraping = Scraping() scraping.scrapingImages(url) scraping.scrapingPDF(url) scraping.scrapingLinks(url) #scraping.scrapingBeautifulSoup(url)
password = params.password1 mensaje = params.mensaje1 busquedas = params.buscarsh inic = params.inicioe fin = params.finale apish = params.apis if opcion == 'metadatos': printMeta(datos) elif opcion == 'cifrado': datos = params.argumento if datos == 'encriptar': Encriptar(Frase) elif datos == 'des': print(Desencriptar(Frase)) elif opcion == 'scraping': scraping = Scraping() scraping.scrapingImages(datos) scraping.scrapingPDF(datos) scraping.scrapingLinks(datos) elif opcion == 'mail': sendmail(asunto, sender, reciver, password, mensaje) print("Correo Enviado") elif opcion == 'shodan': busqueda(busquedas, apish) elif opcion == 'nmap': ip = str(datos) inicios = int(inic) finales = int(fin) Escaneo(inicios, finales, ip) print( "El escaneo se encuentra en txt en la misma carpeta que este programa"
from Libro import Libro from Scraping import Scraping scrap = Scraping()
def setUp(self): self.aScrape = Scraping("realdonaldtrump", 2) self.aScrape.scrape_data() self.shortlist = self.aScrape.get_posts()
from Scraping import Scraping if __name__ == "__main__": url = 'http://python.ie/pycon-2016/schedule/' scraping = Scraping() #scraping.scrapingImagesPdf(url) scraping.scrapingBeautifulSoup(url)