def __init__(self, callback): tk.Toplevel.__init__(self) self.grab_set() self.title('Process file') self.geometry("%dx%d%+d%+d" % (400, 300, 300, 200)) container = tk.Frame(self) container.pack(side="top", fill="both", expand=True) container.grid_rowconfigure(0, weight=1) container.grid_columnconfigure(0, weight=1) self.callback = callback self.processor = DataProcessor() self.defaultvar = 'none' self.radiovar = tk.StringVar(None, self.defaultvar) self.frames = {} for F in (PageTwo, PageThree, PageFour, PageFive): page_name = F.__name__ frame = F(parent=container, controller=self) self.frames[page_name] = frame frame.grid(row=0, column=0, sticky="nsew") self.show_frame("PageTwo")
def getResult(self, src, save_image=False): """ @param {string} src {bool}save_image output debug image @return {OCRDocument} doucument """ pro = DataProcessor(src, ImageType.RAW, save_image=save_image) if pro.prepare() is None: logger.error('image error:{0}'.format(src)) return None buffer = pro.tobinary(pro.batch()) temp_file_name = self.create_TemporyFile(buffer, True) document = self.ocr.recognize(temp_file_name) os.remove(temp_file_name) output = '#' + datetime.now().strftime('%F %T.%f')[:-3] + '\n' output += '\n'.join(document.names()) + '\n' with Serializer.open_stream('../temp/corpus.txt', mode='a') as file: file.write(output) # ocr corpus data -> NaiveBayes classifier # ranking name swap change = self.naivebayes.predict_all(document.names()) #doucument.changeNames(change) document.dump() return document
def __init__(self, media, target=None): self.media = media self.target = target pro = DataProcessor(self.media, ImageType.PLAN) pro.prepare() self.binary = pro.batch() self.color = pro.color self.drawClipSource = True self.imageout_dir = '../temp/trash' os.makedirs(self.imageout_dir, exist_ok=True)
def main(): config = ConfigParser() config.read("config.ini") interval = config.getint("main", "interval") base_endpoint = config.get("main", "base_endpoint") storage_account_conn_str = config.get("azure", "storage_account_conn_str") azure_client = AzureStorageClient(storage_account_conn_str, 'asdsWiki') data = azure_client.get_table() scrapper = WikiScrapper(base_endpoint) dataframe = scrapper.start(data, interval) dataProcessor = DataProcessor() dataProcessor.preprocess_dataframe(dataframe) azure_client.update_by_dataframe(dataframe)
def main(): ui = UserInterface() url_builder = URLBuilder() web_dao = WebDAO() data_processor = DataProcessor() url_builder\ .set_args_dict(ui.get_args_dict())\ .build() web_dao\ .set_download_url(url_builder.get_url())\ .download() data_processor\ .set_raw_content(web_dao.get_raw_content()) data_processor.save_json( f"target/cpsc_recalls_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
from dataprocessor import DataProcessor from preprocess import clean from fileio import FileIO from joblib import load import numpy as np processor = DataProcessor() io = FileIO() path_negation_words = 'negation' def predict(document): # predict document input doc_clean = clean.clean_review(document) doc_clean_process_number = clean.number_process(doc_clean) print(doc_clean_process_number) tfidf = processor.transform(np.array([doc_clean_process_number])) reload = load("model_test") predict = reload.predict(tfidf) if predict == "2": print("POSITIVE :)") elif predict == "1": print("NEUTRAL") else: print("NEGATIVE -_-")
import matplotlib.pyplot as plt import os import numpy as np import folium import pytz as tz # better alternatives -> Apache arrow or pendulum from datetime import datetime from PIL import Image import urllib import urllib.request import wget import ray from scipy.spatial import Voronoi from numpy import array # data process dataprocessor = DataProcessor('/Users/wangyifan/Google Drive/checkin', 'loc-gowalla_totalCheckins.txt') df = dataprocessor.load_date() df = dataprocessor.data_filter(df) df = dataprocessor.data_process(df) # df = df.sample(n=2000, replace=False).reset_index(drop=True) # config: data 30000 cluster_k: 20 df = df[:30000] df_kmeans = df.copy() df_kmeans = df_kmeans[['lat', 'lon']] batch_num = 10 cluster_k = 20 epsilon = 1e-4 precision = 1e-6 iteration = 10 """
pd.set_option('display.max_row', 10000) pd.set_option('display.max_columns', 1000) if __name__ == '__main__': # Reading datset and removing redundant features data_file = "/Users/salil/Downloads/training_car.csv" df_read = pd.read_csv(data_file) df_read = df_read.drop([ 'AUCGUART', 'PRIMEUNIT', 'Nationality', 'VNZIP1', 'VNST', 'BYRNO', 'WheelTypeID', 'PurchDate', 'VehYear' ], axis=1) data = DataProcessor() total_columns = df_read.columns catcols, contcols = data.get_cat_cont_cols(df_read, total_columns) print("Categorical columns: ", catcols) uid = ['RefId'] target = ['IsBadBuy'] contcols = list(set(contcols) - set(uid) - set(target)) features = catcols + contcols print("Numerical columns after target and id removal: ", contcols) df_read.Transmission[df_read.Transmission == 'Manual'] = 'MANUAL' df_read.Color[df_read.Color == 'NOT AVAIL'] = 'NA' df_read.Color[df_read.Color == 'OTHER'] = 'NA' df_read.TopThreeAmericanName[df_read.TopThreeAmericanName == 'OTHER'] = 'NA'
def kickoffDataProcessor(self): from dataprocessor import DataProcessor d = DataProcessor(self.results) d.process() print("ALL RESULTS: \n" + str(self.results) + "\n")
def initUI(self): #Menu self.menubar = self.menuBar() self.fileMenu = self.menubar.addMenu('&File') self.openAct = QAction('&Open', self) self.openAct.setShortcut('Ctrl+O') self.openAct.setIcon(QIcon("./image/open.ico")) self.saveAct = QAction('&Save', self) self.saveAct.setShortcut('Ctrl+S') self.saveAct.setIcon(QIcon("./image/save.ico")) self.importAct = QAction('&Import Data', self) self.importAct.setShortcut('Ctrl+I') self.importAct.setIcon(QIcon("./image/import.ico")) self.exportAct = QAction('&Export Data', self) self.exportAct.setShortcut('Ctrl+E') self.exportAct.setIcon(QIcon("./image/export.ico")) self.exportAct.setEnabled(False) self.exitAct = QAction('&Exit', self) self.exitAct.setIcon(QIcon("./image/exit.ico")) self.fileMenu.addAction(self.openAct) self.fileMenu.addAction(self.saveAct) self.fileMenu.addSeparator() self.fileMenu.addAction(self.importAct) self.fileMenu.addAction(self.exportAct) self.fileMenu.addSeparator() self.fileMenu.addAction(self.exitAct) self.importAct.triggered.connect(self.OnImport) self.exportAct.triggered.connect(self.OnExport) self.exitAct.triggered.connect(self.close) #Data Browser self.DataBrowser = DataBrowser(self) #Process Region Expansion button self.PRButton = QPushButton(">") self.PRButton.setFixedSize(20, 80) self.PRButton.setCheckable(True) self.PRButton.toggled.connect(self.showDataProcessor) #Data Processor self.DataProcessor = DataProcessor(self) #Image Viewer self.ImageViewer = ImageViewer(self) #Mayavi Region Expansion button self.MYButton = QPushButton(">") self.MYButton.setFixedSize(20, 80) self.MYButton.setCheckable(True) #self.MYButton.setEnabled(False) self.MYButton.toggled.connect(self.show3D) #Mayavi scene self.MYWidget = MYWidget(self) #Layout self.panel, self.splitter, self.Databox, self.DataWidget = self.WinLayout() QTimer.singleShot(10, lambda: self.splitter.moveSplitter(self.DataBrowser.minimumWidth(), 1)) self.splitter.splitterMoved.connect(self.splitterMovedEvent) #center panel self.centralPanel = QWidget(self) self.centralPanel.setLayout(self.panel) self.setCentralWidget(self.centralPanel) self.setWindowTitle('ARPES Data Viewer -- By Wei Yao -- Ver 1.0') self.show() self.initCompleteFlag = True
def initUI(self): self.master.title("Analisa Harga Saham") self.pack(fill=BOTH, expand=True) self.columnconfigure(1, weight=1) self.columnconfigure(3, pad=7) self.rowconfigure(3, weight=1) self.rowconfigure(5, pad=7) self.lbl = Label(self, text="Filename: ") self.lbl.grid(sticky=W, pady=4, padx=5, columnspan=4) self.right_frame = Frame(self, width=800, height=400, borderwidth = 1) self.right_frame.grid(row=2, column=1, columnspan=5, rowspan=4, padx=5, sticky=E+W+S+N) self.right_frame.config(relief=SOLID) self.area = Text(self.right_frame, height = 30, width = 40) self.area.grid(row=0, column=1, padx=5, sticky=W+S+N+E) self.splash = Toplevel(self.right_frame) self.splash.overrideredirect(True) self.splash.geometry('200x23+100+100') self.splash.overrideredirect(1) self.splash.bind("<B1-Motion>", self.move_window) self.splash.attributes('-topmost', 'true') window_height = 23 window_width = 400 screen_width = self.splash.winfo_screenwidth() screen_height = self.splash.winfo_screenheight() x_cordinate = int((screen_width/2) - (window_width/2)) y_cordinate = int((screen_height/2) - (window_height/2)) self.splash.geometry("{}x{}+{}+{}".format(window_width, window_height, x_cordinate, y_cordinate)) self.splash.withdraw() pb = Progressbar(self.splash, orient=HORIZONTAL, length=400) pb.config(mode='indeterminate') pb.start(10) pb.grid(row=1, column=1, sticky=W+E+S+N) # self.splash.withdraw() self.dp = DataProcessor() # upload # lblUpload = Label(self, text="upload") # lblUpload.grid(row=1, column=0, columnspan=2) abtn = Button(self, text="Upload", command=self.openFile) abtn.grid(row=1, column=0, sticky=W, padx=5) self.left_frame = Frame(self, width=200, height=400, borderwidth = 1) self.left_frame.grid(row=2, column=0) self.left_frame.config(relief=SOLID) # self.chkBox = Checkbutton(self.left_frame, text = "All data", variable=self.is_all_data, command=self.cbCallback) # self.chkBox.grid(row=1, column=0, sticky=W, padx=5) Separator(self.left_frame,orient=HORIZONTAL).grid(row=2, columnspan=1, ipadx=75, padx=5, sticky=W) self.rangeFrame = self.rangeFrame() #Frame(self.left_frame, borderwidth = 1) self.rangeFrame.grid(row=3, column=0, columnspan=2) # Button Filter self.btnFilter = Button(self.left_frame, text="Filter", command=self.callFilter) self.btnFilter.grid(row=4, column=0, sticky=W, padx=5) Separator(self.left_frame,orient=HORIZONTAL).grid(row=5, columnspan=1, ipadx=75, padx=5, sticky=W) self.txSaham = Text(self.left_frame) self.cbSaham = Combobox(self.left_frame, textvariable=self.selected_saham) self.cbSaham['values'] = [] #self.kodeSaham self.cbSaham['state'] = 'readonly' # normal self.cbSaham.set('-- Pilih Saham --') self.cbSaham.grid(row=7, column=0,padx=5, pady=5) # Buton Proses self.btnProses = Button(self.left_frame, text="proses", command=self.callProses) self.btnProses.grid(row=8, column=0, sticky=W, padx=5) Separator(self.left_frame,orient=HORIZONTAL).grid(row=9, columnspan=1, ipadx=75, padx=5, sticky=W)
from datasources import BPMData data_source = BPMData() elif bpm_name_parsed == "all": from datasources_all import BPMDataAll data_source = BPMDataAll() else: from datasources_bpm import BPMData data_source = BPMData(bpm_name=bpm_name_parsed) if data_source is None: print("Data source doesn't exists!!! You can't use this program!!!") exit() data_proc_X = DataProcessor("X") data_proc_Z = DataProcessor("Z") settingsControl = SettingsControl() mw = MainWindow(data_source, data_proc_X, data_proc_Z, settingsControl, bpm_name_parsed) mw.setWindowTitle('BTMS ({})'.format(bpm_name_parsed)) icon_path = os.path.dirname(os.path.abspath(__file__)) mw_icon = QIcon() mw_icon.addFile(os.path.join(icon_path, 'etc/icons/app_icon_color.png'), QSize(32, 32)) mw.setWindowIcon(mw_icon) data_source.data_ready.connect(mw.on_data1_ready) data_source.data_ready.connect(mw.on_data3_ready) data_source.data_ready.connect(data_proc_X.on_data_recv) data_source.data_ready.connect(data_proc_Z.on_data_recv)
parser.add_argument("--visdom", help="visualize training via visdom_enabled library", default=True) parser.add_argument("--gru", help="use GRU units instead of LSTM units", default=False) parser.add_argument( "--sanitycheck", help= 'list of words for which the nearest word embeddings are found during training, ' 'serves as sanity check, i.e. "dog family king eye"', default="dog family king eye") if __name__ == "__main__": parser = argparse.ArgumentParser() init_parser(parser) args = parser.parse_args() data_proc = DataProcessor(args) model = Rec2Vec(data_proc) bytes_read = 0 epochs = 30 for e in range(epochs): print(f"Starting epoch: {e}") bytes_read = model._train(previously_read=bytes_read, epoch=e) model.save(f"trained/Trefined_Oembeddings{epochs}.vec", model.in_embeddings) model.save(f"trained/Trefined_Vembeddings{epochs}.vec", model.v_embeddings)
def __init__(self): super(MainWindow, self).__init__() self.ui = uic.loadUi('MainWindow.ui', self) self.setWindowTitle("Frequency Measurer") self.window_str = "None" self.frq_founded = 0.0 self.buttonExit.clicked.connect(self.on_exit_button) self.buttonExit.clicked.connect(QApplication.instance().quit) self.data_source = BPMData(1024, self) self.data_source.data_ready.connect(self.on_data1_ready) self.data_source.data_ready.connect(self.on_data3_ready) self.data_proc_X = DataProcessor("X") self.data_proc_Z = DataProcessor("Z") self.data_source.data_ready.connect(self.data_proc_X.on_data_recv) self.data_source.data_ready.connect(self.data_proc_Z.on_data_recv) self.data_proc_X.data_processed.connect(self.on_data2_ready) self.data_proc_Z.data_processed.connect(self.on_data4_ready) self.controlWidgetX.window_changed_str.connect( self.data_proc_X.on_wind_changed) self.controlWidgetX.groupBox.setTitle("Data_X") self.controlWidgetX.set_str_id("Data_X") self.controlWidgetX.scale_changed_obj.connect(self.on_scale_changing) self.controlWidgetZ.window_changed_str.connect( self.data_proc_Z.on_wind_changed) self.controlWidgetZ.groupBox.setTitle("Data_Z") self.controlWidgetZ.set_str_id("Data_Z") self.controlWidgetZ.scale_changed_obj.connect(self.on_scale_changing) self.controlWidgetX.method_changed_str.connect( self.data_proc_X.on_method_changed) self.controlWidgetX.boards_changed.connect( self.data_proc_X.on_boards_changed) self.controlWidgetZ.method_changed_str.connect( self.data_proc_Z.on_method_changed) self.controlWidgetZ.boards_changed.connect( self.data_proc_Z.on_boards_changed) self.settingsControl = SettingsControl() self.settingsControl.add_object(self.controlWidgetX) self.settingsControl.add_object(self.controlWidgetZ) self.buttonRead.clicked.connect(self.on_read_button) self.buttonSave.clicked.connect(self.on_save_button) self.settingsControl.read_settings() self.data_proc_X.data_processed.connect(self.on_freq_status_X) self.data_proc_Z.data_processed.connect(self.on_freq_status_Z) self.plots_customization() self.controlWidgetX.boards_changed.connect(self.boards_X_changed) self.controlWidgetZ.boards_changed.connect(self.boards_Z_changed) self.data_curve1 = self.ui.plotX.plot(pen='r', title='Generated signal X_plot') self.data_curve2 = self.ui.plotFX.plot( pen='r', title='Fourier Transform X_plot') self.data_curve3 = self.ui.plotZ.plot(pen='b', title='Generated signal Z_plot') self.data_curve4 = self.ui.plotFZ.plot( pen='b', title='Fourier Transform Z_plot')
def __init__(self): self.arduino_data_processor = DataProcessor("arduino_data", 0.025, 0.210) self.pub = Publisher('odom', "odom", "base_link")
from encoder.oneplane import OnePlaneEncoder from encoder.sevenplanes import SevenPlaneEncoder # import arch import smallarch as arch from keras.models import Sequential from keras.layers.core import Dense from keras.callbacks import ModelCheckpoint from keras.utils import to_categorical go_board_rows, go_board_cols = 19, 19 num_classes = go_board_rows * go_board_cols num_games = 100 # encoder = OnePlaneEncoder((go_board_rows, go_board_cols)) encoder = SevenPlaneEncoder((go_board_rows, go_board_cols)) processor = DataProcessor(encoder) generator = processor.load_go_data('train', num_games, use_generator=True) X = generator.generate(32, num_classes) print(X) # test_generator =processor.load_go_data('test', num_games,use_generator=True) # from split import Splitter # dir = 'dataset/data' # splitter = Splitter(data_dir=dir) # data = splitter.draw_data('train', num_games) # data_test = splitter.draw_data('test', num_games) # generator = DataGenerator(dir, data) # test_generator = DataGenerator(dir,data_test)
def __init__(self, data_files=[], data_processor=DataProcessor(test_set_proportion=0.20, random_seed=42)): """ Parameters ---------- data_folder : str a directory path where data set files used to build models can be found should be suffixed with '/' """ self.data_files = data_files self.data_processor = data_processor self.model_constructors = { 'decision_tree': DecisionTreeClassifier, 'random_forest': RandomForestClassifier, 'naive_bayes': GaussianNB, 'svc_linear': SVC, 'svc_polynomial': SVC, 'svc_rbf': SVC, 'svc_sigmoid': SVC } self.model_params = { "decision_tree": { "constructor": None, "hp_search": { "max_depth": np.arange(1, 100), "min_samples_leaf": [1, 5, 10, 20, 50, 100, 250] } }, "random_forest": { "constructor": None, "hp_search": { "criterion": ["gini", "entropy"], "n_estimators": np.arange(10, 50), "max_depths": np.arange(1, 100), "min_samples_leaf": [1, 5, 10, 20, 50, 100, 250] } }, "naive_bayes": { "constructor": None, "hp_search": None }, "svc_linear": { "constructor": { "kernel": "linear" }, "hp_search": { 'C': [0.01, 0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9], } }, "svc_polynomial": { "constructor": { "kernel": "poly" }, "hp_search": { 'C': [0.01, 0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9], 'degree': np.arange(3, 10) } }, "svc_rbf": { "constructor": { "kernel": "rbf" }, "hp_search": { 'C': [0.01, 0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9], } }, "svc_sigmoid": { "constructor": { "kernel": "sigmoid" }, "hp_search": { 'C': [0.01, 0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9], } }, } self.processed_data_files = {}