def test_Extractor(): #TEST CLASS EXTRACTOR - OHLC METHOD obj = TSC() val = obj.convert(4, 9, 2014, 1, 0) obj = PM() dic = obj.ret_diz() dic_rev = obj.ret_rev_diz() list_of_types_candles = [dic['1-m'], dic['3-m']] per = obj.make(list_of_types_candles) ext = EXTR() x = ext.ohlc("coinbase-pro", "btcusd", after=val, periods=per) if (type(x) == int and x == -1): print("Bad Request \n") elif (type(x) == int and x == -2): print("You have finished your CPU allowance, retry next hour \n") elif (type(x) == int and x == -3): print("Unexpected Error, retry please \n") else: candles_types_returned = list(x['result'].keys()) print("Candles Types Returned: ({}, {}) ".format( dic_rev[candles_types_returned[1]], dic_rev[candles_types_returned[0]])) candle_1_minute = x['result'][dic['1-m']] candle_3_minutes = x['result'][dic['3-m']] print("Size 1-Minute Candle: ({}, {})".format(len(candle_1_minute), len(candle_1_minute[0]))) print("Size 3-Minutes Candle: ({}, {})".format( len(candle_3_minutes), len(candle_3_minutes[0]))) print("Example 1-Minute Candle: {} ".format(candle_1_minute[0])) print("Example 3-Minutes Candle: {} ".format(candle_3_minutes[0]))
def __init__(self, password, image_folder_path): self.image_folder_path = image_folder_path self.mode = None self._gcm = None self._password = password self._data_length = 0 self.extractor = Extractor(image_folder_path)
def __init__(self): # Init basic objects self.cropper = Cropper() self.extractor = Extractor(self.cropper) self.classifier = Classifier(self.extractor.images) self.connections = Connections(self.extractor, self.classifier) self.visualization = Visualization(self.connections)
def c_07(): ''' get number of products ''' try: extractor = Extractor(language='es') extractor.connect() n = extractor.get_num_of_products() print(n) except Exception as e: print('c_06(), error: {}'.format(e))
class VideoExtractor: def __init__(self): self.extractor = Extractor() def get_random_stream_url(self): return self.extractor.get_random_avail_stream_file() def reload_alive_stream_url(self): self.extractor.check_all_avail_stream_file() def extract_to(self, input_path, output_path, sampling_rate=0.5, max_capture_frame=0, clear_input_folder_if_exists=False): if clear_input_folder_if_exists and os.path.isdir(output_path): print "Clearing output folder {}".format(output_path) shutil.rmtree(output_path) os.mkdir(output_path) skip_sec = 1 / sampling_rate video = cv2.VideoCapture(input_path) success, image = video.read() count = 0 fps = 0 (major_ver, minor_ver, subminor_ver) = cv2.__version__.split('.') if int(major_ver) < 3: fps = video.get(cv2.cv.CV_CAP_PROP_FPS) print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format( fps) else: fps = video.get(cv2.CAP_PROP_FPS) print "Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format( fps) frame_rate = 1.0 / fps current_time = 0 count_reading = 0 print "Start capturing at url {}".format(input_path) print "FPS: {} ".format(fps) print "Output: {}".format(output_path) print "Sampling rate: {}".format(sampling_rate) while success: success, image = video.read() count_reading += 1 current_time += frame_rate if current_time >= skip_sec: current_time = 0 number_name = "{0:05d}".format(count) output_url = output_path + "frame_" + number_name + ".jpg" cv2.imwrite(output_url, image) print "Writing {}".format(output_url) count += 1 if 0 < max_capture_frame <= count: break print "Task completed, total {} files".format(count)
def test_GetDueTodayTask(self): extractor = Extractor() result1 = extractor.extract("Apa saja deadline hari ini?", Context.getDueTodayTask) result2 = extractor.extract("Deadline tubes hari ini apa saja, ya?", Context.getDueTodayTask) result3 = extractor.extract("yang deadline pada hari ini", Context.getDueTodayTask) result4 = extractor.extract( "Bot, minta daftar deadline dong pada hari ini. Makasih :)", Context.getDueTodayTask) result5 = extractor.extract( "Untuk tucil, deadline pada hari ini apa saja?", Context.getDueTodayTask) result6 = extractor.extract( "Tubes yang deadline pada hari ini apa saja?", Context.getDueTodayTask) assert result1 != None assert result1.jenisTask == "" assert result2 != None assert result2.jenisTask == "tubes" assert result3 != None assert result3.jenisTask == "" assert result4 != None assert result4.jenisTask == "" assert result5 != None assert result5.jenisTask == "tucil" assert result6 != None assert result6.jenisTask == "tubes"
def __init__(self,trainDataPath=None, extractorOptions=[True,False,True],loadPath=None): if not loadPath==None: f=open(loadPath,"rb") self.gpc,self.ext=pickle.load(f) f.close() elif not (trainDataPath==None or extractorOptions==[True,False,True]): self.ext=Extractor(extractorOptions) data=ext.readTsv(trainDataPath) features=self.ext.features(data[0]) labels=data[1] self.gpc=GaussianProcessClassifier().fit(features,labels) else: raise Exception("Either path to saved classifier or (dataset+extractor options) should be given")
def __init__(self, localDownloadQueue="PendingDownloadQueue"): Base.__init__(self) self.download_queue = localDownloadQueue self.ftp_sync = FileSyncer() self.move_file_into_processing() Extractor(self.local_directory_to_sync) Cleaner(self.local_directory_to_sync)
def decodeBloc(self, titre, bloc): globalName = Extractor.extractBloc('Global', titre) entityName = Extractor.extractBloc('Entity', titre) gameName = Extractor.extractBloc('Game', titre) if globalName: nouveau = Global(self.workspace) nouveau.decode(bloc) self.globalBloc = nouveau elif entityName: nouveau = Entity(self.workspace, entityName) nouveau.decode(bloc) self.entities.append(nouveau) elif gameName: nouveau = Game(self.workspace) nouveau.decode(bloc) self.game = nouveau
async def main(args): extractor = Extractor('patterns.json') if args.file: source = URLGenerator.source(args.file) else: source = ClipboardReader.source visited = set() if args.destination: folder = args.destination else: folder = 'images/' if not os.path.isdir(folder): return count = 1 async for target in source(): if not extractor.input.match(target): continue content, url = await Downloader.downloadContent(target) if url in visited: break visited.add(url) for img in extractor.resource.finditer(content): if not img.group(): continue link = img.group() name = link.split('/')[-1] + '.png' await Downloader.downloadFile(link, os.path.join(folder, name)) print('\033[K\033[A') print('Downloading ' + '.' * count, end='\r') count = (count + 1) % 5 + 1 await waitTilFinish()
def test_attributes2(self): """Test the setting of attributes. """ e = Extractor('testing') self.assertEqual(e.name, 'testing') self.assertTrue(e.JsonRecorder and isinstance(e.JsonRecorder, JsonRecorder)) # The existence and type correctness of the json recorder. self.assertEqual(e.JsonRecorder.name, 'testing') # extractor.JsonRecorder has the same name as the extractor.
def OnExtract(self, events): text = self.sourcePage.GetValue().strip() keyword_result = '' result = '' if text != '': if self.languageType.GetSelection() == 1: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() print similarity_function extractor = Extractor( stop_words_file='./TextRank/trainer/stopword_zh.data') keyword, keyphrase = extractor.keyword_train(text=text) abstract = extractor.sentence_train( text, sentences_percent=sentences_percent, sim_func=similarity_function) keyword_result = '/'.join(keyword) keyword_result += '\n关键短语:\n' + '/'.join(keyphrase) result += '。'.join(abstract) + r'。' self.abstractPage.SetValue(result) #设置文本样式 #f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 #self.abstractPage.SetStyle(0, len(result), wx.TextAttr('black',wx.NullColor,f)) self.keywordPage.SetValue(keyword_result) else: art_type = self.articleType.GetSelection() extractor = EnExtractor( stop_words_file='./TextRank/trainer/stopword_en.data') if art_type == 1: keyphrase = extractor.keyphrase_train( text, article_type='Abstract') keyword_result = 'Keyphrases:\n' + '/'.join(keyphrase) else: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() keyphrase = extractor.keyphrase_train( text, article_type='Fulltext') summary = extractor.summary_train( text, sentences_percent=sentences_percent, sim_func=similarity_function) keyword_result = '/'.join(keyphrase) result += ' ' + ' '.join(summary) self.abstractPage.SetValue(result) #设置文本样式 f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 self.abstractPage.SetStyle( 0, len(result), wx.TextAttr('black', wx.NullColor, f)) self.keywordPage.SetValue(keyword_result) else: #test #sentences_percent = self.sentencesPercent.GetValue() #print filter(lambda x:x.isdigit(), sentences_percent) print "No article"
def __init__(self, histogram): self.distribution = [] self.generalization = [] self.abstraction = [] self.filter = Propagator() self.plotter = Estimator() self.detector = Extractor() self.plotter.setspace(histogram)
def hello2(): user_input = request.form["user-input"] print("\"{}\"".format(user_input)) context_identifier = ContextIdentifier() context = context_identifier.getContext(user_input) bot_response = "" suggested_word = [] if context == Context.unknown: print("Unknown?") suggested_word = SpellChecker().getWordSuggestion(user_input) elif context == Context.help: bot_response = "Terdapat beberapa hal yang dapat dilakukan:\n" bot_response += "- Menambah tugas (coba \"Tolong ingatkan kalau ada kuis IF3110 Bab 2 pada 22/04/21\")\n" bot_response += "- Melihat semua tugas (coba \"bot tugas apa saja sejauh ini ya?\")\n" bot_response += "-. Melihat tugas pada periode tertentu (coba \"Apa saja deadline antara 03/04/2021 sampai 15/04/2021\")\n" bot_response += "- Melihat tugas beberapa hari/minggu ke depan (coba \"Ada tugas apa saja 2 hari ke depan\")\n" bot_response += "- Melihat tugas yang deadline-nya hari ini (coba \"Deadline tucil hari ini apa saja, ya?\")\n" bot_response += "- Menampilkan deadline dari suatu tugas tertentu (coba \"Deadline tucil IF2230 itu kapan?\")\n" bot_response += "- Memperbarui tugas (coba \"Deadline tucil IF2230 diundur menjadi 02/02/2021\")\n" bot_response += "- Menghapus/menyelesaikan tugas (coba \"bot ujian IF2230 sudah selesai ya jadi gausah disimpan lagi\")\n" bot_response += "Kata kunci:\n" + "\n".join(list(map(lambda x: "- " + x, ["kuis", "tubes", "tucil", "ujian"]))) else: extractor = Extractor() print("\"{}\"".format(user_input)) command = extractor.extract(user_input, context) if command == None: suggested_word = SpellChecker().getWordSuggestion(user_input) else: command.execute() bot_response = command.getResult() if bot_response == "": if len(suggested_word) > 0: bot_response = "Mungkin maksud kata kunci Anda: " + ", ".join(suggested_word) else: bot_response = "Saya tidak paham .-." chat_data.append((user_input, bot_response.split("\n"))) return render_template("index.html", message_data = chat_data[(-5 if len(chat_data) >= 5 else 0):])
def run(file_path): dis = Dissector() parser = Parser() extrator = Extractor() if os.path.isdir(file_path): dir_files_list = os.listdir(file_path) for files in dir_files_list: sample = Sample(os.path.join(file_path, files)) dis.extract_file(sample) parser.parse(sample) extrator.extract(sample) sample.print_info() else: sample = Sample(file_path) dis.extract_file(sample) parser.parse(sample) extrator.extract(sample) sample.print_info()
class Decoder: MODES = {'GCM': 8} MODE_LENGTH = 1 def __init__(self, password, image_folder_path): self.image_folder_path = image_folder_path self.mode = None self._gcm = None self._password = password self._data_length = 0 self.extractor = Extractor(image_folder_path) def get_header_length(self): return GCM.get_iv_length() + GCM.get_salt_length() def decode_header(self, raw_header): # assume raw_header is bytearray of values salt_end = GCM.get_salt_length() gcm_end = salt_end + GCM.get_iv_length() password_salt = bytes(raw_header[:salt_end]) iv = bytes(raw_header[salt_end:gcm_end]) self._gcm = GCM(iv=iv, password_salt=password_salt) self._gcm.make_key(self._password) del self._password def get_tag(self, raw_data): tag = bytes(raw_data[-GCM.get_tag_length():]) self._gcm.set_tag(tag) def decrypt_and_save_data(self, raw_data, destination_file): decrypted = self._gcm.decrypt( raw_data[self.get_header_length():-GCM.get_tag_length()]) # with open("temp", "wb") as f: # f.write(decrypted) self._gcm.decrypt_finalize() with open(destination_file, "wb") as f: f.write(decrypted) def decode_file(self, file_path): raw_data = self.extractor.load_images() # with open("decode-test", "wb") as f: # f.write(raw_data) raw_header = raw_data[:self.get_header_length()] self.decode_header(raw_header) self.get_tag(raw_data) self.decrypt_and_save_data(raw_data, file_path)
def process_sample(file_path): print file_path config=Config() load_sucess=config.load_config() if load_sucess: dis=Dissector() parser=Parser() extrator=Extractor() operator=Operator(config) r_generator=Report_Generator() sample=Sample(file_path) rlt=dis.extract_file(sample,config.get_output_dir()) bin_time_list=list() if rlt: parser.parse(sample) extrator.extract(sample) # config.print_info() operator.operate(sample,config) r_generator.write_report(sample) return sample
def process_sample(file_path): print file_path config = Config() load_sucess = config.load_config() if load_sucess: dis = Dissector() parser = Parser() extrator = Extractor() operator = Operator(config) r_generator = Report_Generator() sample = Sample(file_path) rlt = dis.extract_file(sample, config.get_output_dir()) bin_time_list = list() if rlt: parser.parse(sample) extrator.extract(sample) # config.print_info() operator.operate(sample, config) r_generator.write_report(sample) return sample
def process_all(self): '''This method Extract all the rows SELECTed from the table (mySQL product_translation), Transform, and Load to Redis. This is used for Kill and Fill''' try: extractor = Extractor() #transformer= Transformer( self.params ) extractor.connect() num_of_rows = 20 extractor.execute() rows = extractor.get_next_batch(num_of_rows) transformer = Transformer() courier = CourierClicoh() loader = LoaderCsv() while len(rows) > 0: products = [] for row in rows: print('id : {}'.format(row['id'])) j = courier.add_product(row) product = transformer.get_csv_row(j, row) '''d = { 'id' : row[ 'id' ], 'sku' : row[ 'sku' ], 'clicoh_id' : "row[ 'clicoh_id' ]", 'clicoh_variant_id' : "row[ 'clicoh_variant_id' ]", }''' products.append(product) loader.write_rows(products) rows = extractor.get_next_batch(num_of_rows) extractor.close() print('\n ETL.procell_all() ... end') except Exception as e: print('ETL.process_all(), error: {}'.format(e)) raise
def extraer_rep(argumentos, pipe): url = argumentos['url'] url = url.split('/') if url[2] != 'gitlab.com': pipe.send(400) return 1 url = url[3] + '/' + url[4] try: if 'token' in argumentos.keys(): ext = Extractor(link=url, token=argumentos['token']) else: ext = Extractor(url) p = ext.extraer() Almacen.guardar(p) pipe.send(200) except Exception as e: ServidorLogica.log(str(e)) if str(e) == 'Proyecto no encontrado': pipe.send(404) return 1 if str(e) == 'Permisos insuficientes': pipe.send(401) return 1 pipe.send(e)
def c_01(): '''get batches of rows from mySQL database. ''' try: e = Extractor() e.connect() e.execute() rows = e.get_next_batch(num_of_rows=2) while len(rows) > 0: print('\n ut_01.c_01(), looping BATCH of rows') for r in rows: print('\n {}'.format(r)) rows = e.get_next_batch(num_of_rows=2) e.close() except Exception as e: print('ut_01.c_01(), error: {}'.format(e)) print('\n end of case 1.')
class ETL: def __init__(self, prop): self.prop = prop self.extractor = Extractor(prop) self.transformer = Transformation(prop) self.loader = Loader(prop) def WSToSQL(self): categoriesLevel1 = self.transformer.transformXML( self.extractor.extractLevel1FromEbayService()) self.loader.preLoadSQL() size = len(categoriesLevel1) for (idx, category) in enumerate(categoriesLevel1): categoryID = category[0] print("Building " + str(int(idx) * 100 / size) + "%") self.loader.loadSQL( self.transformer.transformXML( self.extractor.extractFromEbayService(categoryID))) def SQLToHTML(self, categoryID): return self.loader.loadHTML( self.transformer.buildHTML(categoryID, self.extractor.extractTreeFromDB()), categoryID)
def OnExtract(self,events): text = self.sourcePage.GetValue().strip() keyword_result='' result = '' if text != '': if self.languageType.GetSelection() == 1: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() print similarity_function extractor = Extractor(stop_words_file='./TextRank/trainer/stopword_zh.data') keyword,keyphrase = extractor.keyword_train(text=text) abstract = extractor.sentence_train(text, sentences_percent=sentences_percent,sim_func=similarity_function) keyword_result = '/'.join(keyword) keyword_result += '\n关键短语:\n' + '/'.join(keyphrase) result += '。'.join(abstract)+r'。' self.abstractPage.SetValue(result) #设置文本样式 #f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 #self.abstractPage.SetStyle(0, len(result), wx.TextAttr('black',wx.NullColor,f)) self.keywordPage.SetValue(keyword_result) else : art_type = self.articleType.GetSelection() extractor = EnExtractor(stop_words_file='./TextRank/trainer/stopword_en.data') if art_type == 1: keyphrase = extractor.keyphrase_train(text,article_type='Abstract') keyword_result = 'Keyphrases:\n'+'/'.join(keyphrase) else: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() keyphrase = extractor.keyphrase_train(text,article_type='Fulltext') summary = extractor.summary_train(text,sentences_percent = sentences_percent,sim_func=similarity_function) keyword_result = '/'.join(keyphrase) result += ' '+' '.join(summary) self.abstractPage.SetValue(result) #设置文本样式 f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 self.abstractPage.SetStyle(0, len(result), wx.TextAttr('black',wx.NullColor,f)) self.keywordPage.SetValue(keyword_result) else: #test #sentences_percent = self.sentencesPercent.GetValue() #print filter(lambda x:x.isdigit(), sentences_percent) print "No article"
def process_all(self): try: print('process_all ... begin') extractor = Extractor(self.language) transformer = Transformer() extractor.connect() num_of_products = extractor.get_num_of_products() extractor.execute() num_of_rows = 10 rows = extractor.get_next_batch(num_of_rows) pipeline = self.loader.create_pipeline() while len(rows) > 0: for product in rows: print('\n {}'.format(product)) tf = transformer.get_tf(product) print('len tf: {}'.format(len(tf))) self.loader.insert_tf(product['id'], tf, pipeline) pipeline.execute() rows = extractor.get_next_batch(num_of_rows) extractor.close() self.loader.count_df() self.loader.count_tf_idf(num_of_products) except Exception as e: print('ETL.process_all(), error: {}'.format(e))
class Classifier: def __init__(self,trainDataPath=None, extractorOptions=[True,False,True],loadPath=None): if not loadPath==None: f=open(loadPath,"rb") self.gpc,self.ext=pickle.load(f) f.close() elif not (trainDataPath==None or extractorOptions==[True,False,True]): self.ext=Extractor(extractorOptions) data=ext.readTsv(trainDataPath) features=self.ext.features(data[0]) labels=data[1] self.gpc=GaussianProcessClassifier().fit(features,labels) else: raise Exception("Either path to saved classifier or (dataset+extractor options) should be given") def save(self): fileName=input("What should the name of PICKLE file be for this classifier?\nex)trained\n") if fileName=="": fileName="trained" f=open("./data/"+fileName+".pkl",'wb') pickle.dump((self.gpc,self.ext),f) f.close() def predict(self,x): return self.gpc.predict(self.ext.features(x)) def evaluate(self,evalDataPath,targetLabels=["y"]): data=self.ext.readTsv(evalDataPath) features=self.ext.features(data[0]) predictions=self.gpc.predict(features) labels=data[1] tp=0 fp=0 fn=0 tn=0 for i in range(len(predictions)): if(predictions[i] in targetLabels and labels[i] in targetLabels): tp+=1 elif(predictions[i] in targetLabels and not labels[i] in targetLabels): fp+=1 elif(not predictions[i] in targetLabels and labels[i] in targetLabels): fn+=1 else: tn+=1 precision=tp/(tp+fp) recall=tp/(tp+fn) accuracy=(tp+tn)/len(predictions) print("Total accurancies: {0}\n".format(len(predictions))) print("precision: \t\t{0}\nrecall: \t\t{1}\naccuracy: \t\t{2}".format(precision,recall,accuracy)) print("confusion matrix:\nP\\R\tY\tN\nY\t{0}\t{1}\nN\t{2}\t{3}".format(tp,fp,fn,tn)) def showFeatures(self,dataPath=None,data=None): if not dataPath ==None: data=self.ext.readTsv(dataPath) features=self.ext.features(data[0],True) elif not data==None: features=self.ext.features(data,True) else: raise Exception("no data available") for i in features: print(i)
print("confusion matrix:\nP\\R\tY\tN\nY\t{0}\t{1}\nN\t{2}\t{3}".format(tp,fp,fn,tn)) def showFeatures(self,dataPath=None,data=None): if not dataPath ==None: data=self.ext.readTsv(dataPath) features=self.ext.features(data[0],True) elif not data==None: features=self.ext.features(data,True) else: raise Exception("no data available") for i in features: print(i) if __name__=="__main__": ext=Extractor() if(input("Are you here to evaluate?\n>>> ").lower()=="y"): itemsList=[f for f in listdir("data/") if f[-4:]==".pkl"] for item in range(len(itemsList)): print("{0}: {1}".format(item,itemsList[item])) clfPath=int(input("Select your classifier PICKLE file\n>>> ")) clf=Classifier(loadPath="data/"+itemsList[clfPath]) dataList=[f for f in listdir("data/") if f[-4:]==".tsv"] for datum in range(len(dataList)): print("{0}: {1}".format(datum,dataList[datum])) dataPath=int(input("Select your evaluation data\n>>> ")) targetLabels=[l for l in input("target labels\n>>> ")] clf.evaluate("data/"+dataList[dataPath],targetLabels) _exit(0) opStr=input("Input your option string\nex)'fttt'\n>>> ") options=[]
options['baseUrl'] = 'http://supervisor/core/api/states/' except: logging.warning("Couldn't get token from Enviroment assuming this is dev") optionsFile = 'local.json' with open(optionsFile) as json_file: options.update(loadJson(json_file)) logging.info("Got {} for database".format(options['db_name'])) client = InfluxDBClient(host=options['db_ip'], port=options['db_port'], username=options['db_user'], password=options['db_pass']) extractor = Extractor(options['baseUrl'], options['sensorPrefix'], options[TOKENKEY], options['Timezone'], options['db_measurement_name']) Einf = Einfluxer(client, options['db_name']) message = "" try: data = extractor.GetMeasurements() except ValueError: message = "Got ValueError when fetching data from Home assistant, The sensor probably haven't fetched data yet." if message == "": if not data[0]['tags']['Metering date'] == Einf.GetLatestMeterDate(): message = "Inserted data for: {}".format( data[0]['tags']['Metering date']) try:
def __init__(self, prop): self.prop = prop self.extractor = Extractor(prop) self.transformer = Transformation(prop) self.loader = Loader(prop)
from sklearn.metrics import confusion_matrix import numpy as np def evaluate(predictions, test_y): accuracy = sum(i == j for i, j in zip(predictions, test_y)) / len(test_y) return accuracy n_realizations = 20 hit_rates = [] X, y = datasets.get_car_numbers_dataset() # X = Extractor.lbp_extraction(X , 24, 8) # X = Extractor.hu_extraction(X) X = Extractor.glcm_extraction(X) # create new a knn model knn = KNeighborsClassifier() for _ in range(n_realizations): # split dataset into train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y) # create a dictionary of all values we want to test for n_neighbors param_grid = {'n_neighbors': np.arange(3, 20, 2)} # use gridsearch to test all values for n_neighbors
from InfoToTextFile import RecordEntry excelFile = "FinalResult.xlsx" mailingListFile = "Mailing/MailingList.csv" # list of emails, to be written to a text file mail_list = [] # now to action! # create record writer instance recWriter = RecordEntry() # extract mail extractor = Extractor(workBookPath=excelFile) mail_list = extractor.extractAllEmailAddress() # write to file recWriter.writeRecords(text_file=mailingListFile, records=mail_list) # get team records by name and write mail lists or full info # writing mailing lists here teamNames = [ "Corporate", "Operations", "Publications", "Promotions", "Logistics" ] stats = {} for teamName in teamNames: recordsFile = "Mailing/{}.csv".format(teamName) records = extractor.extractRecordByTeam(teamName=teamName)
def main(args: str) -> None: url: str = __check_error_input(args) print("Extracting subjects...") Extractor(WebScraping(url).extract_subjects()).extract_data_frame().to_csv( "./Dati/subjects.csv", index=False) print("DONE!\n")
""" An example for calling the test function of the model to predict big5 traits and the predicted values are returned as a json string """ from Extractor import Extractor from langdetect import detect import json import time user_status = """ LOL! 'Take a deep breath\n2. think of someone u like\n3. press F10 5 times\n4. send this to 5 coments\n5. look at ur background\n9 minutes ago ', 'Rotate your facebook friends omg =). http://tinyurl.com/37wdxok', 'sarap!!!!!!!!!!!!!!!!!1', '???? Happy New Year In Advance!! ????? ?????????????????????????????????? ????????????????????????????????? ???????????????????????????? ????? ?????????????????????????????????? ?????????????????????????????????? ?????????????????????????????????? ??????????????????????????????????', 'Rotate your facebook friends omg =). http://tinyurl.com/2ug8r4c', "Apple finally gives away 100 Iphone 4's\n http://apps.facebook.com/qbquiz-ghflb", 'ano susuutin bukas ??????', 'Rotate your facebook friends omg =). http://tinyurl.com/3xngp4s', 'Rotate your facebook friends omg =). http://tinyurl.com/3354776', ':)?', 'Rotate your facebook friends omg =). http://tinyurl.com/35ssbf9', 'AnOng sTanza Kana Roseangeli Salvador', 'Rotate your facebook friends omg =). http://tinyurl.com/2vvpw55', 'Rotate your facebook friends omg =). http://tinyurl.com/2wmv4au', 'Rotate your facebook friends omg =). http://tinyurl.com/32f7z4z', '-', 'Pls like 4 ST PAUL!!!!!!!!!!?', 'HAPPY NEW YEAR!!!!!', 'EXAM NA BUKAS!!!!!!!!!? ?', 'POINTER TO REVIEW IN READING 4\n-synonyms and antonyms\n-words with silent letters\n -simile and methaphor\n-idiomatic expressions\n-consonant digraphs\ngaling kay ms nabong yan', 'Rotate your facebook friends omg =). http://tinyurl.com/346gw6w', 'POINTER TO REVIEW IN READING 4-synonyms and antonyms-words with silent letters -simile and methaphor-idiomatic expressions-consonant digraphsgaling kay ms nabong yan', 'IS WATCHING SHOUTOUT', 'Rotate your facebook friends omg =). http://tinyurl.com/37ne983', "Apple finally gives away 100 Iphone 4's http://apps.facebook.com/rxytnkgo", 'Rotate your facebook friends omg =). http://tinyurl.com/35tplvh', 'Rotate your facebook friends omg =). http://tinyurl.com/33oua5o', 'Rotate your facebook friends omg =). http://tinyurl.com/38k6hmn', 'Rotate your facebook friends omg =). http://tinyurl.com/2w2o7am', 'Rotate your facebook friends omg =). http://tinyurl.com/3ajg5b3', 'Gwyneth Erin M. Nohay ano size nung cardcase?', "------[]------ put this\n--[][][][][]-- as your\n------[]------ status if\n------[]------ you're a christian\n------[]------ and not ashamed", 'te Shiela Mae Tolentino thankyou sa psp and chocolates:)', '??????????????????????????? ??????????????????????????? ???????????????????????????\n\n vs\n\n?????????????????????????? \n?????????????????????????? \n??????????????????????????', "TEACHER: juan! may 5 ibon na nakaupo sa bakod, pag binaril mo ang isa..ilan ang matitira?\n\nJUAN: wala po ma'am.\n\nTEACHER: bobo ka ba?! bat mo nasabing wala?! sige nga?!\n\nJUAN: dahil isang putok mo lang ng baril aalis lahat ng ibon at walang matitira! bobo! ikaw ang maupo dito, at ako ang magtuturo!\n\n// matalino XD", '???????????????????????\n??????? ? If you love JESUS\n????????????copy this in your wall ???????????????????????\n?????????????????????\n??????? ? ?', 'This year October has 5 Mondays, 5 Saturdays and 5 Sundays. This Happens once every 823years. This is called money bags. So copy this to your status and money will arrive within 4days. Based on Chinese Feng Shui. The one who does not copy, will be without money', "I know 10 facts about you\n\n1. You are reading this.\n2. You can't say M without touching your lips\n3. You just tried it\n... 4. You just smiled or laughed.\n6. You are a boy/girl.\n7. You didnt realize I skipped 5.\n8. You are looking back at 4 and 6.\n9. You are liking this.\n10. You are reading me telling you to like this.\n\nLIKE THIS STATUS IF YOU DID THOSE THINGS"] """ predictTypeList = ['ope','con','ext','agr','neu'] msg = """The input is not English. Currently we only support English language.""" ext = Extractor() lan = ext.isEnglish(user_status) if lan == True: score = ext.getScore(user_status, predictTypeList) perc = ext.getPercentile(score) print json.dumps({'scores':score, 'percentiles':perc}) else: print msg
X_test, y_test, file, generation=20, scale=20, conjunction=False, maxsat_on=True, tailor=False, fitness_func='Pro') param = m.pso() phi = param[0] theta = param[1] psi = param[2] k = param[3] ex = Extractor(clf, phi, theta, psi) ex.extract_forest_paths() ex.rule_filter() print('max_rule', ex.max_rule, 'max_node', ex.max_node) print("original path number: ", ex.n_original_leaves_num) print('original scale: ', ex.scale) print("original path number after rule filter: ", len(ex._forest_values)) sat = Z3Process(ex, k) sat.leaves_partition() sat.maxsat() sat.run_filter() print("original path number after maxsat: ", sat.n_rules_after_max, " after filter: ", sat.n_rules_after_filter, '\n') print('classes:', clf.classes_)