def on_actRun_triggered(self): """ 运行按钮 :return: 无返回值 """ self.ui.actRun.setEnabled(False) # 这里需要注意的是点击一次run 控件之后,应当设置未为不可选, self.key_para["SaveData_Statue"] = False self.key_para.update(self.get_panel_para()) # 运行前应该得到面板参数,返回dict,并更新self.key_para print(self.key_para) self._data_thread = QThread() self.data_analysis = DataAnalysis(self.key_para) self.data_analysis.pbar.connect(self.set_progressBar_int) self.data_analysis.tbw.connect(self.add_textBrowser_str) self.data_analysis.sbar.connect(self.add_statusBar_str) self.data_analysis.run_end.connect(lambda: self.stop_thread(self._data_thread)) self.data_analysis.moveToThread(self._data_thread) self._data_thread.started.connect(self.data_analysis.run) self._data_thread.finished.connect(self.draw_prepare) show_info = "数据计算中..." self.add_statusBar_str(show_info) self.add_textBrowser_str(show_info) self._data_thread.start() print(f'开启{self._data_thread.currentThread()}线程,现在线程状态 :', self._data_thread.isRunning())
def main(file_name): data = DataAnalysis(file_name) data.read_data(file_name) print("Languages:") print_output(data.top_n_lang_freqs(10)) print("Top level country domains:") print_output(data.top_n_country_tlds_freqs(10))
def analyze_and_train(): # to view the different metrics obtained from the data run perform data analysis eda = DataAnalysis(input) eda.perform_data_analysis() # to train a model run the trainer function tr = Trainer(data_path=input, model_name=model_name) tr.train() tr.eval() tr.save()
def main(file_name): data = DataAnalysis(file_name) # Report top ten languages by frequency print("Languages:") print_output(data.top_n_lang_freqs(10)) # Report top ten country (2 letter) top # level domains by frequency print("Top level country domains:") print_output(data.top_n_country_tlds_freqs(10))
def main(file_name): '''analyze the users file for top usage of languages and top level domain''' data = DataAnalysis(file_name) data.read_data(file_name) # Report top ten languages by frequency print("Languages:") print_output(data.top_n_lang_freqs(10)) # Report top ten country (2 letter) top # level domains by frequency print("Top level country domains:") print_output(data.top_n_country_tlds_freqs(10))
def splitDataFirstMode(): # use all available data sampleData = dp.returnData(1, randomSeed=10) saveDataset(sampleData, 'AD_dataset') # split data into X and y da = DataAnalysis(dirName=dirName) X, y = da.splitXY(sampleData) # split data into training (80%) and testing (20%) set xTrain, xTest, yTrain, yTest = da.splitTrainTest(X, y, trainSize=0.8) saveDataset(xTrain.assign(normality=yTrain.values), 'AD_set_train') saveDataset(xTest.assign(normality=yTest.values), 'AD_set_test') # get data characteristics for current dataset size dataInfo[1] = da.getDataCharacteristics(yTrain, yTest) # Run predictions for datasetSize in (selectedSizes or [0.2, 0.4, 0.6, 0.8, 1]): # get no. of samples and prediction accuracy noOfSamples, predictions = da.getScores( xTrain, xTest, yTrain, yTest, trainSize=datasetSize, randomSeeds=randomSeeds, selectedAlgorithms=selectedAlgorithms ) if selectedAlgorithms else da.getScores(xTrain, yTrain, xTest, yTest, trainSize=datasetSize, randomSeeds=randomSeeds) # save results for graphs and .csv files savePredictionScores(noOfSamples, predictions, datasetSize)
def normalMode(): da = DataAnalysis(dirName=dirName) # Run predictions for datasetSize in (selectedSizes or [0.2, 0.4, 0.6, 0.8, 1]): # get the percentage of all data sampleData = dp.returnData(datasetSize, randomSeed=10) saveDataset(sampleData, 'AD_dataset') # split data into X and y X, y = da.splitXY(sampleData) # split data into training (80%) and testing (20%) set xTrain, xTest, yTrain, yTest = da.splitTrainTest(X, y, trainSize=0.8) # get data characteristics for current dataset size dataInfo[datasetSize] = da.getDataCharacteristics(yTrain, yTest) # get no. of samples and prediction accuracy (trainSize is set to 1, so no further data splitting is done) # multiple runs make no sense here, since we always take whole set noOfSamples, predictions = da.getScores( xTrain, xTest, yTrain, yTest, trainSize=1, randomSeeds=[10], selectedAlgorithms=selectedAlgorithms ) if selectedAlgorithms else da.getScores(xTrain, yTrain, xTest, yTest, trainSize=datasetSize, randomSeeds=randomSeeds) # save results for graphs and .csv files savePredictionScores(noOfSamples, predictions, datasetSize)
def _main_thread_loop(): global thread_DH global thread_DA1 global thread_DBStrip global thread_J global packet_queue # Поднимаем потоки thread_DH.start() thread_DA1.start() thread_DBStrip.start() thread_J.start() # Перезапускаем потоки по мере их падения while True: if thread_DH.isAlive() == False: thread_DH = DataHarvest(packet_queue) thread_DH.start() logger.warning('Поток сборщика данных перезапущен.') if thread_DA1.isAlive() == False: thread_DA1 = DataAnalysis(packet_queue) thread_DA1.start() logger.warning('Поток анализа данных канала перезапущен.') if thread_DBStrip.isAlive() == False: thread_DBStrip = DBStriper() thread_DBStrip.start() logger.warning('Поток удаления старых данных перезапущен.') if thread_J.isAlive() == False: thread_J = EventJournal() thread_J.start() logger.warning('Поток журналирования перезапущен.') sleep(THREAD_CHECK_TIMEOUT)
# Load config config = json.load(open("../config.json")) # Load markets/articles articles_csv = csv.reader(open("articles.csv")) # Parse markets/articles articles = [] for row in articles_csv: if row[0] == "Article Title": # Skip header continue article_obj = {} article_obj["article"] = {"title": row[0]} article_obj["market"] = { "symbol": "TEST_MARKET", "entities": row[1].split(","), "wikipedia_urls": row[2].split(","), "target_words": row[3].split(","), "anti_target_words": row[4].split(",") } article_obj["label"] = int(row[5]) articles.append(article_obj) da = DataAnalysis(config, load_model=False) da.create_model(articles) da.load_model()
import sys import matplotlib.pyplot as plt import intrinio from data_analysis import DataAnalysis # USERNAME = '******' # PASSWORD = '******' USERNAME = '******' PASSWORD = '******' data_collection = DataCollection(username=USERNAME, password=PASSWORD) ticker_symbol = "AAPL" dates, prices = data_collection.retrieve_data(ticker_symbol) da = DataAnalysis() # data = da.get_yearly_trends(dates, prices, [3]) bounds, sell_dates, profits = da.get_optimal_bounds(dates, prices, low_bound_min=0.3, top_bound_max=0.5, interval=0.01, investment=100, purchase_strategy="immediate") da.plot_data(dates, prices, ticker_symbol, sell_dates=sell_dates) # if sys.argv[1]: # ticker = sys.argv[1] # start_date = sys.argv[2] # results = data_collection.get_prices(ticker, start_date) # data_collection.plot_data(ticker) # data_collection.save_data(ticker)
import threading import logging from logging.handlers import RotatingFileHandler from time import sleep from sqlalchemy import create_engine from sqlalchemy_utils import database_exists, create_database import os from param import extractParams appdir = os.path.abspath(os.path.dirname(__file__)) logger = logging.getLogger("mitter") packet_queue = Queue() thread_DH = DataHarvest(packet_queue) thread_DA1 = DataAnalysis(packet_queue) thread_DBStrip = DBStriper() thread_J = EventJournal() THREAD_CHECK_TIMEOUT = 1 # Интервал контроля работоспособности потоков в сек. def MainRoutine(): _logger_config() _postgres_db_check() extractParams() try: _main_thread_loop() except Exception:
def AnalysisTrainModels(): if request.method == 'GET': dataset_name = 'data.csv' out = DataAnalysis(dataset_name) train = MainML(dataset_name) return train
class Signal(QtWidgets.QWidget): signal = QtCore.pyqtSignal() class COMThreadManual(QtCore.QThread): def __init__(self): QtCore.QThread.__init__(self) def __del__(self): self.wait() def run(self): ui.refresh_com() if __name__ == "__main__": import sys app = QtWidgets.QApplication(sys.argv) app.setStyle('Fusion') pinchSensorUI = MainWindow() ui = Ui_pinchSensorUI() ui2 = DataAnalysis() pinchSensorUI.show() sys.exit(app.exec_()) # TODO bootup loading screen