def load_partitioned_graph_image(self, partitioning_name): G, grid_size, partitions_vertices = parse_partitioning( str(get_project_root()) + '/partitionings/' + partitioning_name) self.G = G self.last_number_of_partitions = 2 self.adjacent_partitions = {1: [2], 2: [1]} self.partitions_vertices = partitions_vertices
def __init__(self): #print("Initializing Infersent..") model_version = 1 MODEL_PATH = get_project_root() / Path("encoder/infersent%s.pkl" % model_version) params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048, 'pool_type': 'max', 'dpout_model': 0.0, 'version': model_version} model = InferSent(params_model) model.load_state_dict(torch.load(MODEL_PATH)) # word vector path for the model: W2V_PATH = get_project_root() / Path('GloVe/glove.840B.300d.txt' if model_version == 1 else '../fastText/crawl-300d-2M.vec') model.set_w2v_path(W2V_PATH) # build the vocabulary of word vectors model.build_vocab_k_words(K=100000) self.model = model
def __init__(self): self.mapping = dict() path_to_json = os.path.join(get_project_root(), 'res/') for root, dirs, files in os.walk(path_to_json): for file in files: if file.endswith('.json'): with open(os.path.join(root, file), "r") as json_file: entry = json.load(json_file) self.mapping[entry['name']] = entry
def load_image(self, grid_name, remove_off=False): if grid_name: start = time.time() if remove_off: converting_function = convert_image_to_graph_off_removed else: converting_function = convert_image_to_graph_off_weighted_0 G, indivisible_areas, off_areas, grid_size, dim_1, dim_2 = converting_function( str(get_project_root()) + '/grids/' + grid_name, self.show_progress) self.conversion_time = time.time() - start self.grid_size = grid_size self.G = G self.indivisible_areas = indivisible_areas self.off_areas = off_areas self.last_number_of_partitions = G.number_of_nodes() self.vertical_size = dim_1 self.horizontal_size = dim_2 self.bigger_dim = dim_1 if dim_1 > dim_2 else dim_2 self.smaller_dim = dim_1 if dim_1 < dim_2 else dim_2
def __init__(self) -> None: """SCOUTS Constructor. Defines all aspects of the GUI.""" # ### # ### Main Window setup # ### # Inherits from QMainWindow super().__init__() self.rootdir = get_project_root() self.threadpool = QThreadPool() # Sets values for QMainWindow self.setWindowTitle("SCOUTS") self.setWindowIcon( QIcon( os.path.abspath(os.path.join(self.rootdir, 'src', 'scouts.ico')))) # Creates StackedWidget as QMainWindow's central widget self.stacked_pages = QStackedWidget(self) self.setCentralWidget(self.stacked_pages) # Creates Widgets for individual "pages" and adds them to the StackedWidget self.main_page = QWidget() self.samples_page = QWidget() self.gating_page = QWidget() self.pages = (self.main_page, self.samples_page, self.gating_page) for page in self.pages: self.stacked_pages.addWidget(page) # ## Sets widget at program startup self.stacked_pages.setCurrentWidget(self.main_page) # ### # ### MAIN PAGE # ### # Main page layout self.main_layout = QVBoxLayout(self.main_page) # Title section # Title self.title = QLabel(self.main_page) self.title.setText('SCOUTS - Single Cell Outlier Selector') self.title.setStyleSheet(self.style['title']) self.title.adjustSize() self.main_layout.addWidget(self.title) # ## Input section # Input header self.input_header = QLabel(self.main_page) self.input_header.setText('Input settings') self.input_header.setStyleSheet(self.style['header']) self.main_layout.addChildWidget(self.input_header) self.input_header.adjustSize() self.main_layout.addWidget(self.input_header) # Input frame self.input_frame = QFrame(self.main_page) self.input_frame.setFrameShape(QFrame.StyledPanel) self.input_frame.setLayout(QFormLayout()) self.main_layout.addWidget(self.input_frame) # Input button self.input_button = QPushButton(self.main_page) self.input_button.setStyleSheet(self.style['button']) self.set_icon(self.input_button, 'x-office-spreadsheet') self.input_button.setObjectName('input') self.input_button.setText(' Select input file (.xlsx or .csv)') self.input_button.clicked.connect(self.get_path) # Input path box self.input_path = QLineEdit(self.main_page) self.input_path.setObjectName('input_path') self.input_path.setStyleSheet(self.style['line edit']) # Go to sample naming page self.samples_button = QPushButton(self.main_page) self.samples_button.setStyleSheet(self.style['button']) self.set_icon(self.samples_button, 'preferences-other') self.samples_button.setText(' Name samples...') self.samples_button.clicked.connect(self.goto_samples_page) # Go to gating page self.gates_button = QPushButton(self.main_page) self.gates_button.setStyleSheet(self.style['button']) self.set_icon(self.gates_button, 'preferences-other') self.gates_button.setText(' Gating && outlier options...') self.gates_button.clicked.connect(self.goto_gates_page) # Add widgets above to input frame Layout self.input_frame.layout().addRow(self.input_button, self.input_path) self.input_frame.layout().addRow(self.samples_button) self.input_frame.layout().addRow(self.gates_button) # ## Analysis section # Analysis header self.analysis_header = QLabel(self.main_page) self.analysis_header.setText('Analysis settings') self.analysis_header.setStyleSheet(self.style['header']) self.analysis_header.adjustSize() self.main_layout.addWidget(self.analysis_header) # Analysis frame self.analysis_frame = QFrame(self.main_page) self.analysis_frame.setFrameShape(QFrame.StyledPanel) self.analysis_frame.setLayout(QVBoxLayout()) self.main_layout.addWidget(self.analysis_frame) # Cutoff text self.cutoff_text = QLabel(self.main_page) self.cutoff_text.setText('Type of outlier to select:') self.cutoff_text.setToolTip( 'Choose whether to select outliers using the cutoff value from a reference\n' 'sample (OutR) or by using the cutoff value calculated for each sample\n' 'individually (OutS)') self.cutoff_text.setStyleSheet(self.style['label']) # Cutoff button group self.cutoff_group = QButtonGroup(self) # Cutoff by sample self.cutoff_sample = QRadioButton(self.main_page) self.cutoff_sample.setText('OutS') self.cutoff_sample.setObjectName('sample') self.cutoff_sample.setStyleSheet(self.style['radio button']) self.cutoff_sample.setChecked(True) self.cutoff_group.addButton(self.cutoff_sample) # Cutoff by reference self.cutoff_reference = QRadioButton(self.main_page) self.cutoff_reference.setText('OutR') self.cutoff_reference.setObjectName('ref') self.cutoff_reference.setStyleSheet(self.style['radio button']) self.cutoff_group.addButton(self.cutoff_reference) # Both cutoffs self.cutoff_both = QRadioButton(self.main_page) self.cutoff_both.setText('both') self.cutoff_both.setObjectName('sample ref') self.cutoff_both.setStyleSheet(self.style['radio button']) self.cutoff_group.addButton(self.cutoff_both) # Markers text self.markers_text = QLabel(self.main_page) self.markers_text.setStyleSheet(self.style['label']) self.markers_text.setText('Show results for:') self.markers_text.setToolTip( 'Individual markers: for each marker, select outliers\n' 'Any marker: select cells that are outliers for AT LEAST one marker' ) # Markers button group self.markers_group = QButtonGroup(self) # Single marker self.single_marker = QRadioButton(self.main_page) self.single_marker.setText('individual markers') self.single_marker.setObjectName('single') self.single_marker.setStyleSheet(self.style['radio button']) self.single_marker.setChecked(True) self.markers_group.addButton(self.single_marker) # Any marker self.any_marker = QRadioButton(self.main_page) self.any_marker.setText('any marker') self.any_marker.setObjectName('any') self.any_marker.setStyleSheet(self.style['radio button']) self.markers_group.addButton(self.any_marker) # Both methods self.both_methods = QRadioButton(self.main_page) self.both_methods.setText('both') self.both_methods.setObjectName('single any') self.both_methods.setStyleSheet(self.style['radio button']) self.markers_group.addButton(self.both_methods) # Tukey text self.tukey_text = QLabel(self.main_page) self.tukey_text.setStyleSheet(self.style['label']) # Tukey button group self.tukey_text.setText('Tukey factor:') self.tukey_group = QButtonGroup(self) # Low Tukey value self.tukey_low = QRadioButton(self.main_page) self.tukey_low.setText('1.5') self.tukey_low.setStyleSheet(self.style['radio button']) self.tukey_low.setChecked(True) self.tukey_group.addButton(self.tukey_low) # High Tukey value self.tukey_high = QRadioButton(self.main_page) self.tukey_high.setText('3.0') self.tukey_high.setStyleSheet(self.style['radio button']) self.tukey_group.addButton(self.tukey_high) # Add widgets above to analysis frame layout self.analysis_frame.layout().addWidget(self.cutoff_text) self.cutoff_buttons = QHBoxLayout() for button in self.cutoff_group.buttons(): self.cutoff_buttons.addWidget(button) self.analysis_frame.layout().addLayout(self.cutoff_buttons) self.analysis_frame.layout().addWidget(self.markers_text) self.markers_buttons = QHBoxLayout() for button in self.markers_group.buttons(): self.markers_buttons.addWidget(button) self.analysis_frame.layout().addLayout(self.markers_buttons) self.analysis_frame.layout().addWidget(self.tukey_text) self.tukey_buttons = QHBoxLayout() for button in self.tukey_group.buttons(): self.tukey_buttons.addWidget(button) self.tukey_buttons.addWidget(QLabel()) # aligns row with 2 buttons self.analysis_frame.layout().addLayout(self.tukey_buttons) # ## Output section # Output header self.output_header = QLabel(self.main_page) self.output_header.setText('Output settings') self.output_header.setStyleSheet(self.style['header']) self.output_header.adjustSize() self.main_layout.addWidget(self.output_header) # Output frame self.output_frame = QFrame(self.main_page) self.output_frame.setFrameShape(QFrame.StyledPanel) self.output_frame.setLayout(QFormLayout()) self.main_layout.addWidget(self.output_frame) # Output button self.output_button = QPushButton(self.main_page) self.output_button.setStyleSheet(self.style['button']) self.set_icon(self.output_button, 'folder') self.output_button.setObjectName('output') self.output_button.setText(' Select output folder') self.output_button.clicked.connect(self.get_path) # Output path box self.output_path = QLineEdit(self.main_page) self.output_path.setStyleSheet(self.style['line edit']) # Generate CSV checkbox self.output_csv = QCheckBox(self.main_page) self.output_csv.setText('Export multiple text files (.csv)') self.output_csv.setStyleSheet(self.style['checkbox']) self.output_csv.setChecked(True) # Generate XLSX checkbox self.output_excel = QCheckBox(self.main_page) self.output_excel.setText('Export multiple Excel spreadsheets (.xlsx)') self.output_excel.setStyleSheet(self.style['checkbox']) self.output_excel.clicked.connect(self.enable_single_excel) # Generate single, large XLSX checkbox self.single_excel = QCheckBox(self.main_page) self.single_excel.setText( 'Also save one multi-sheet Excel spreadsheet') self.single_excel.setToolTip( 'After generating all Excel spreadsheets, SCOUTS combines them into ' 'a single\nExcel spreadsheet where each sheet corresponds to an output' 'file from SCOUTS') self.single_excel.setStyleSheet(self.style['checkbox']) self.single_excel.setEnabled(False) self.single_excel.clicked.connect(self.memory_warning) # Add widgets above to output frame layout self.output_frame.layout().addRow(self.output_button, self.output_path) self.output_frame.layout().addRow(self.output_csv) self.output_frame.layout().addRow(self.output_excel) self.output_frame.layout().addRow(self.single_excel) # ## Run & help-quit section # Run button (stand-alone) self.run_button = QPushButton(self.main_page) self.set_icon(self.run_button, 'system-run') self.run_button.setText(' Run!') self.run_button.setStyleSheet(self.style['run button']) self.main_layout.addWidget(self.run_button) self.run_button.clicked.connect(self.run) # Help-quit frame (invisible) self.helpquit_frame = QFrame(self.main_page) self.helpquit_frame.setLayout(QHBoxLayout()) self.helpquit_frame.layout().setMargin(0) self.main_layout.addWidget(self.helpquit_frame) # Help button self.help_button = QPushButton(self.main_page) self.set_icon(self.help_button, 'help-about') self.help_button.setText(' Help') self.help_button.setStyleSheet(self.style['md button']) self.help_button.clicked.connect(self.get_help) # Quit button self.quit_button = QPushButton(self.main_page) self.set_icon(self.quit_button, 'process-stop') self.quit_button.setText(' Quit') self.quit_button.setStyleSheet(self.style['md button']) self.quit_button.clicked.connect(self.close) # Add widgets above to help-quit layout self.helpquit_frame.layout().addWidget(self.help_button) self.helpquit_frame.layout().addWidget(self.quit_button) # ### # ### SAMPLES PAGE # ### # Samples page layout self.samples_layout = QVBoxLayout(self.samples_page) # ## Title section # Title self.samples_title = QLabel(self.samples_page) self.samples_title.setText('Name your samples') self.samples_title.setStyleSheet(self.style['title']) self.samples_title.adjustSize() self.samples_layout.addWidget(self.samples_title) # Subtitle self.samples_subtitle = QLabel(self.samples_page) string = ( 'Please name the samples to be analysed by SCOUTS.\n\nSCOUTS searches the first ' 'column of your data\nand locates the exact string as part of the sample name.' ) self.samples_subtitle.setText(string) self.samples_subtitle.setStyleSheet(self.style['label']) self.samples_subtitle.adjustSize() self.samples_layout.addWidget(self.samples_subtitle) # ## Sample addition section # Sample addition frame self.samples_frame = QFrame(self.samples_page) self.samples_frame.setFrameShape(QFrame.StyledPanel) self.samples_frame.setLayout(QGridLayout()) self.samples_layout.addWidget(self.samples_frame) # Sample name box self.sample_name = QLineEdit(self.samples_page) self.sample_name.setStyleSheet(self.style['line edit']) self.sample_name.setPlaceholderText('Sample name ...') # Reference check self.is_reference = QCheckBox(self.samples_page) self.is_reference.setText('Reference?') self.is_reference.setStyleSheet(self.style['checkbox']) # Add sample to table self.add_sample_button = QPushButton(self.samples_page) QShortcut(QKeySequence("Return"), self.add_sample_button, self.write_to_sample_table) self.set_icon(self.add_sample_button, 'list-add') self.add_sample_button.setText(' Add sample (Enter)') self.add_sample_button.setStyleSheet(self.style['button']) self.add_sample_button.clicked.connect(self.write_to_sample_table) # Remove sample from table self.remove_sample_button = QPushButton(self.samples_page) QShortcut(QKeySequence("Delete"), self.remove_sample_button, self.remove_from_sample_table) self.set_icon(self.remove_sample_button, 'list-remove') self.remove_sample_button.setText(' Remove sample (Del)') self.remove_sample_button.setStyleSheet(self.style['button']) self.remove_sample_button.clicked.connect( self.remove_from_sample_table) # Add widgets above to sample addition layout self.samples_frame.layout().addWidget(self.sample_name, 0, 0) self.samples_frame.layout().addWidget(self.is_reference, 1, 0) self.samples_frame.layout().addWidget(self.add_sample_button, 0, 1) self.samples_frame.layout().addWidget(self.remove_sample_button, 1, 1) # ## Sample table self.sample_table = QTableWidget(self.samples_page) self.sample_table.setColumnCount(2) self.sample_table.setHorizontalHeaderItem(0, QTableWidgetItem('Sample')) self.sample_table.setHorizontalHeaderItem( 1, QTableWidgetItem('Reference?')) self.sample_table.horizontalHeader().setSectionResizeMode( 0, QHeaderView.Stretch) self.sample_table.horizontalHeader().setSectionResizeMode( 1, QHeaderView.ResizeToContents) self.samples_layout.addWidget(self.sample_table) # ## Save & clear buttons # Save & clear frame (invisible) self.saveclear_frame = QFrame(self.samples_page) self.saveclear_frame.setLayout(QHBoxLayout()) self.saveclear_frame.layout().setMargin(0) self.samples_layout.addWidget(self.saveclear_frame) # Clear samples button self.clear_samples = QPushButton(self.samples_page) self.set_icon(self.clear_samples, 'edit-delete') self.clear_samples.setText(' Clear table') self.clear_samples.setStyleSheet(self.style['md button']) self.clear_samples.clicked.connect(self.prompt_clear_data) # Save samples button self.save_samples = QPushButton(self.samples_page) self.set_icon(self.save_samples, 'document-save') self.save_samples.setText(' Save samples') self.save_samples.setStyleSheet(self.style['md button']) self.save_samples.clicked.connect(self.goto_main_page) # Add widgets above to save & clear layout self.saveclear_frame.layout().addWidget(self.clear_samples) self.saveclear_frame.layout().addWidget(self.save_samples) # ### # ### GATING PAGE # ### # Gating page layout self.gating_layout = QVBoxLayout(self.gating_page) # ## Title section # Title self.gates_title = QLabel(self.gating_page) self.gates_title.setText('Gating & outlier options') self.gates_title.setStyleSheet(self.style['title']) self.gates_title.adjustSize() self.gating_layout.addWidget(self.gates_title) # ## Gating options section # Gating header self.gate_header = QLabel(self.gating_page) self.gate_header.setText('Gating') self.gate_header.setStyleSheet(self.style['header']) self.gate_header.adjustSize() self.gating_layout.addWidget(self.gate_header) # Gating frame self.gate_frame = QFrame(self.gating_page) self.gate_frame.setFrameShape(QFrame.StyledPanel) self.gate_frame.setLayout(QFormLayout()) self.gating_layout.addWidget(self.gate_frame) # Gating button group self.gating_group = QButtonGroup(self) # Do not gate samples self.no_gates = QRadioButton(self.gating_page) self.no_gates.setObjectName('no_gate') self.no_gates.setText("Don't gate samples") self.no_gates.setStyleSheet(self.style['radio button']) self.no_gates.setChecked(True) self.gating_group.addButton(self.no_gates) self.no_gates.clicked.connect(self.activate_gate) # CyToF gating self.cytof_gates = QRadioButton(self.gating_page) self.cytof_gates.setObjectName('cytof') self.cytof_gates.setText('Mass Cytometry gating') self.cytof_gates.setStyleSheet(self.style['radio button']) self.cytof_gates.setToolTip( 'Exclude cells for which the average expression of all\n' 'markers is below the selected value') self.gating_group.addButton(self.cytof_gates) self.cytof_gates.clicked.connect(self.activate_gate) # CyToF gating spinbox self.cytof_gates_value = QDoubleSpinBox(self.gating_page) self.cytof_gates_value.setMinimum(0) self.cytof_gates_value.setMaximum(1) self.cytof_gates_value.setValue(0.1) self.cytof_gates_value.setSingleStep(0.05) self.cytof_gates_value.setEnabled(False) # scRNA-Seq gating self.rnaseq_gates = QRadioButton(self.gating_page) self.rnaseq_gates.setText('scRNA-Seq gating') self.rnaseq_gates.setStyleSheet(self.style['radio button']) self.rnaseq_gates.setToolTip( 'When calculating cutoff, ignore reads below the selected value') self.rnaseq_gates.setObjectName('rnaseq') self.gating_group.addButton(self.rnaseq_gates) self.rnaseq_gates.clicked.connect(self.activate_gate) # scRNA-Seq gating spinbox self.rnaseq_gates_value = QDoubleSpinBox(self.gating_page) self.rnaseq_gates_value.setMinimum(0) self.rnaseq_gates_value.setMaximum(10) self.rnaseq_gates_value.setValue(0) self.rnaseq_gates_value.setSingleStep(1) self.rnaseq_gates_value.setEnabled(False) # export gated population checkbox self.export_gated = QCheckBox(self.gating_page) self.export_gated.setText('Export gated cells as an output file') self.export_gated.setStyleSheet(self.style['checkbox']) self.export_gated.setEnabled(False) # Add widgets above to Gate frame layout self.gate_frame.layout().addRow(self.no_gates, QLabel()) self.gate_frame.layout().addRow(self.cytof_gates, self.cytof_gates_value) self.gate_frame.layout().addRow(self.rnaseq_gates, self.rnaseq_gates_value) self.gate_frame.layout().addRow(self.export_gated, QLabel()) # ## Outlier options section # Outlier header self.outlier_header = QLabel(self.gating_page) self.outlier_header.setText('Outliers') self.outlier_header.setStyleSheet(self.style['header']) self.outlier_header.adjustSize() self.gating_layout.addWidget(self.outlier_header) # Outlier frame self.outlier_frame = QFrame(self.gating_page) self.outlier_frame.setFrameShape(QFrame.StyledPanel) self.outlier_frame.setLayout(QVBoxLayout()) self.gating_layout.addWidget(self.outlier_frame) # Top outliers information self.top_outliers = QLabel(self.gating_page) self.top_outliers.setStyleSheet(self.style['label']) self.top_outliers.setText( 'By default, SCOUTS selects the top outliers from the population') self.top_outliers.setStyleSheet(self.style['label']) # Bottom outliers data self.bottom_outliers = QCheckBox(self.gating_page) self.bottom_outliers.setText('Include results for low outliers') self.bottom_outliers.setStyleSheet(self.style['checkbox']) # Non-outliers data self.not_outliers = QCheckBox(self.gating_page) self.not_outliers.setText('Include results for non-outliers') self.not_outliers.setStyleSheet(self.style['checkbox']) # Add widgets above to Gate frame layout self.outlier_frame.layout().addWidget(self.top_outliers) self.outlier_frame.layout().addWidget(self.bottom_outliers) self.outlier_frame.layout().addWidget(self.not_outliers) # ## Save/back button self.save_gates = QPushButton(self.gating_page) self.set_icon(self.save_gates, 'go-next') self.save_gates.setText(' Back to main menu') self.save_gates.setStyleSheet(self.style['md button']) self.gating_layout.addWidget(self.save_gates) self.save_gates.clicked.connect(self.goto_main_page) # ## Add empty label to take vertical space self.empty_label = QLabel(self.gating_page) self.empty_label.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) self.gating_layout.addWidget(self.empty_label)
""" Fit accelerated failure time model on the merge uncongested and pre-breakdown data. """ import os import pandas as pd from lifelines import WeibullAFTFitter from src.utils import get_project_root import plotly.express as px import plotly.io as pio import numpy as np from matplotlib import pyplot as plt pio.renderers.default = "browser" # Set paths: path_to_project = str(get_project_root()) path_interim = os.path.join(path_to_project, "data", "interim") path_processed = os.path.join(path_to_project, "data", "processed") path_figures_v1 = os.path.join(path_to_project, "figures_v1") if not os.path.exists(path_figures_v1): os.mkdir(path_figures_v1) path_plot_dump = os.path.join(path_figures_v1, "plot_dump") if not os.path.exists(path_plot_dump): os.mkdir(path_plot_dump) path_cap_df_merge_and_meta = os.path.join(path_interim, "all_merge_meta.csv") path_prebreakdown_merge = os.path.join(path_interim, "prebkdn_uncongested_merge_meta.csv") def one_hot_coding_cat(data): hot_code_df = pd.get_dummies(data.geometry_type)
from src.utils import get_project_root, predict_and_save_to_txt, predict_and_save_to_xlsx ROOT_DIR = get_project_root() working_dir = str(ROOT_DIR) + "/src/testing/svm" print(ROOT_DIR) test_file = str(ROOT_DIR) + "/resources/sell_detection_train.v1.0.txt" model_file = working_dir + "/" + "svm_final_model_official.sav" txt_result_file = working_dir + "/" + "result.txt" predict_and_save_to_txt(test_file, model_file, txt_result_file) xlsx_result_file = working_dir + "/" + "result.xlsx" predict_and_save_to_xlsx(test_file, model_file, xlsx_result_file)
import pandas as pd import numpy as np import os from src.utils import get_project_root DATAFILES_BASEDIR = os.path.join(get_project_root(), 'input/datafiles/') def get_train_data_v1(season=None): ################################################## # read data ################################################## RegularSeasonCompactResults = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'RegularSeasonCompactResults.csv')) NCAATourneyCompactResults = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'NCAATourneyCompactResults.csv')) NCAATourneySeeds = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'NCAATourneySeeds.csv')) TeamConferences = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'TeamConferences.csv')) Conferences = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'Conferences.csv')) Teams = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'Teams.csv')) TeamConferences = (pd.merge(TeamConferences, Conferences, on='ConfAbbrev') .rename({'Description': 'conf_descr'}, axis=1)) ################################################## # process data ################################################## NCAATourneySeeds['seednum'] = NCAATourneySeeds['Seed'].str.slice(1, 3).astype(int)
import os import pandas as pd from src.utils import fix_birth_date, fix_transaction_date, get_project_root root_folder = get_project_root() person_file_path = os.path.join(root_folder, 'data', 'BI_assignment_person.csv') account_file_path = os.path.join(root_folder, 'data', 'BI_assignment_account.csv') transaction_file_path = os.path.join(root_folder, 'data', 'BI_assignment_transaction.csv') def process_all_files(engine): process_person_file(engine) process_account_file(engine) process_transaction_file(engine) def process_person_file(engine): person_df = pd.read_csv(person_file_path) person_df.dropna(how='all', inplace=True) person_df['birth_date'] = person_df['birth_date'].apply(fix_birth_date) person_df['birth_date'] = person_df['birth_date'].astype('datetime64[ns]') person_df.to_sql('Person', con=engine, if_exists='append', index=False) def process_account_file(engine): account_df = pd.read_csv(account_file_path) account_df.to_sql('Account', con=engine, if_exists='append', index=False)
import random import traceback from fontTools.fontBuilder import FontBuilder from fontTools.pens.ttGlyphPen import TTGlyphPen from fontTools.ttLib import TTFont from config import NAME_STRING from src.utils import str_has_whitespace, str_has_emoji, deduplicate_str, ensure_cmap_has_all_text, subset_ttf_font, \ get_project_root root = get_project_root() def obfuscate(plain_text, shadow_text, filename: str, only_ttf: bool, target_path: str = 'output') -> dict: """ :param plain_text: 用户看到的内容 :param shadow_text: 爬虫看到的内容 :param filename: 不含格式后缀的文件名 :param only_ttf: 是否需要woff、woff2格式 :param target_path: 生成的目标目录 """ if str_has_whitespace(plain_text) | str_has_whitespace(shadow_text): raise Exception('明文或阴书不允许含有空格') if str_has_emoji(plain_text) | str_has_emoji(shadow_text):
def setUp(self): self.storage_loc = os.path.join(get_project_root(), "test_metadata_storage") self.storage = LocalStorageAccessor(self.storage_loc)
import pystan import pickle import argparse import os from src.utils import get_project_root # see src/ folder in project repo from src.data import make_dataset import pandas as pd import numpy as np from sklearn.metrics import log_loss MODEL_BASEDIR = os.path.join(get_project_root(), 'models') def create_stan_model(): model_code = ''' /* pairwise logistic regression model of winning the game - Use boxscore averages for regression features - Share parameters with regression model of scorediff */ data { int<lower=0> N_teams; int<lower=0> N; // number of games in regular season int<lower=0> N_tourney; // number of games in tournament int<lower=1, upper=N_teams> j_team[N + N_tourney]; // index for team 1 int<lower=1, upper=N_teams> k_team[N + N_tourney]; // index for team 2 real x1[N + N_tourney]; // score_mean_team1 - score_opp_team2 real x2[N + N_tourney]; // score_opp_team1 - score_mean_team2
def __init__(self) -> None: """ViolinGUI Constructor. Defines all aspects of the GUI.""" # ## Setup section # Inherits from QMainWindow super().__init__() self.rootdir = get_project_root() # QMainWindow basic properties self.setWindowTitle("SCOUTS - Violins") self.setWindowIcon( QIcon( os.path.abspath(os.path.join(self.rootdir, 'src', 'scouts.ico')))) # Creates QWidget as QMainWindow's central widget self.page = QWidget(self) self.setCentralWidget(self.page) # Miscellaneous initialization values self.threadpool = QThreadPool() # Threadpool for workers self.population_df = None # DataFrame of whole population (raw data) self.summary_df = None # DataFrame indicating which SCOUTS output corresponds to which rule self.summary_path = None # path to all DataFrames generated by SCOUTS self.main_layout = QVBoxLayout(self.page) # Title section # Title self.title = QLabel(self.page) self.title.setText('SCOUTS - Violins') self.title.setStyleSheet(self.style['title']) self.title.adjustSize() self.main_layout.addWidget(self.title) # ## Input section # Input header self.input_header = QLabel(self.page) self.input_header.setText('Load data') self.input_header.setStyleSheet(self.style['header']) self.input_header.adjustSize() self.main_layout.addWidget(self.input_header) # Input/Output frame self.input_frame = QFrame(self.page) self.input_frame.setFrameShape(QFrame.StyledPanel) self.input_frame.setLayout(QFormLayout()) self.main_layout.addWidget(self.input_frame) # Raw data button self.input_button = QPushButton(self.page) self.input_button.setStyleSheet(self.style['button']) self.set_icon(self.input_button, 'x-office-spreadsheet') self.input_button.setObjectName('file') self.input_button.setText(' Load raw data file') self.input_button.setToolTip( 'Load raw data file (the file given to SCOUTS as the input file)') self.input_button.clicked.connect(self.get_path) # SCOUTS results button self.output_button = QPushButton(self.page) self.output_button.setStyleSheet(self.style['button']) self.set_icon(self.output_button, 'folder') self.output_button.setObjectName('folder') self.output_button.setText(' Load SCOUTS results') self.output_button.setToolTip( 'Load data from SCOUTS analysis ' '(the folder given to SCOUTS as the output folder)') self.output_button.clicked.connect(self.get_path) # Add widgets above to input frame Layout self.input_frame.layout().addRow(self.input_button) self.input_frame.layout().addRow(self.output_button) # ## Samples section # Samples header self.samples_header = QLabel(self.page) self.samples_header.setText('Select sample names') self.samples_header.setStyleSheet(self.style['header']) self.samples_header.adjustSize() self.main_layout.addWidget(self.samples_header) # Samples frame self.samples_frame = QFrame(self.page) self.samples_frame.setFrameShape(QFrame.StyledPanel) self.samples_frame.setLayout(QFormLayout()) self.main_layout.addWidget(self.samples_frame) # Samples label self.samples_label = QLabel(self.page) self.samples_label.setText( 'Write sample names delimited by semicolons below.\nEx: Control;Treat_01;Pac-03' ) self.samples_label.setStyleSheet(self.style['label']) # Sample names line edit self.sample_names = QLineEdit(self.page) self.sample_names.setStyleSheet(self.style['line edit']) # Add widgets above to samples frame Layout self.samples_frame.layout().addRow(self.samples_label) self.samples_frame.layout().addRow(self.sample_names) # ## Analysis section # Analysis header self.analysis_header = QLabel(self.page) self.analysis_header.setText('Plot parameters') self.analysis_header.setStyleSheet(self.style['header']) self.analysis_header.adjustSize() self.main_layout.addWidget(self.analysis_header) # Analysis frame self.analysis_frame = QFrame(self.page) self.analysis_frame.setFrameShape(QFrame.StyledPanel) self.analysis_frame.setLayout(QFormLayout()) self.main_layout.addWidget(self.analysis_frame) # Analysis labels self.analysis_label_01 = QLabel(self.page) self.analysis_label_01.setText('Compare') self.analysis_label_01.setStyleSheet(self.style['label']) self.analysis_label_02 = QLabel(self.page) self.analysis_label_02.setText('with') self.analysis_label_02.setStyleSheet(self.style['label']) self.analysis_label_03 = QLabel(self.page) self.analysis_label_03.setText('for marker') self.analysis_label_03.setStyleSheet(self.style['label']) self.analysis_label_04 = QLabel(self.page) self.analysis_label_04.setText('Outlier type') self.analysis_label_04.setStyleSheet(self.style['label']) # Analysis drop-down boxes self.drop_down_01 = QComboBox(self.page) self.drop_down_01.addItems([ 'whole population', 'non-outliers', 'top outliers', 'bottom outliers', 'none' ]) self.drop_down_01.setStyleSheet(self.style['drop down']) self.drop_down_01.setCurrentIndex(2) self.drop_down_02 = QComboBox(self.page) self.drop_down_02.addItems([ 'whole population', 'non-outliers', 'top outliers', 'bottom outliers', 'none' ]) self.drop_down_02.setStyleSheet(self.style['drop down']) self.drop_down_02.setCurrentIndex(0) self.drop_down_03 = QComboBox(self.page) self.drop_down_03.setStyleSheet(self.style['drop down']) self.drop_down_04 = QComboBox(self.page) self.drop_down_04.addItems(['OutS', 'OutR']) self.drop_down_04.setStyleSheet(self.style['drop down']) # Add widgets above to samples frame Layout self.analysis_frame.layout().addRow(self.analysis_label_01, self.drop_down_01) self.analysis_frame.layout().addRow(self.analysis_label_02, self.drop_down_02) self.analysis_frame.layout().addRow(self.analysis_label_03, self.drop_down_03) self.analysis_frame.layout().addRow(self.analysis_label_04, self.drop_down_04) self.legend_checkbox = QCheckBox(self.page) self.legend_checkbox.setText('Add legend to the plot') self.legend_checkbox.setStyleSheet(self.style['checkbox']) self.main_layout.addWidget(self.legend_checkbox) # Plot button (stand-alone) self.plot_button = QPushButton(self.page) self.set_icon(self.plot_button, 'system-run') self.plot_button.setText(' Plot') self.plot_button.setToolTip( 'Plot data after loading the input data and selecting parameters') self.plot_button.setStyleSheet(self.style['run button']) self.plot_button.setEnabled(False) self.plot_button.clicked.connect(self.run_plot) self.main_layout.addWidget(self.plot_button) # ## Secondary Window # This is used to plot the violins only self.secondary_window = QMainWindow(self) self.secondary_window.resize(720, 720) self.dynamic_canvas = DynamicCanvas(self.secondary_window, width=6, height=6, dpi=120) self.secondary_window.setCentralWidget(self.dynamic_canvas) self.secondary_window.addToolBar( NavBar(self.dynamic_canvas, self.secondary_window))
def get_train_data_v1(season=None, detailed=False): detail = 'Detailed' if detailed else 'Compact' ################################################## # read data ################################################## RegularSeasonResults = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'RegularSeason{}Results.csv'.format(detail))) NCAATourneyResults = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'NCAATourney{}Results.csv'.format(detail))) NCAATourneySeeds = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'NCAATourneySeeds.csv')) TeamConferences = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'TeamConferences.csv')) Conferences = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'Conferences.csv')) Teams = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'Teams.csv')) TeamConferences = (pd.merge(TeamConferences, Conferences, on='ConfAbbrev') .rename({'Description': 'conf_descr'}, axis=1)) SampleSubmissionStage2 = pd.read_csv( os.path.join(get_project_root(), 'input/SampleSubmissionStage2.csv')) tourney2019 = SampleSubmissionStage2['ID'].str.split('_', expand=True).astype(int) # tourney2019.index = SampleSubmissionStage2['ID'] tourney2019.columns = ['Season', 'WTeamID', 'LTeamID'] NCAATourneyResults = pd.concat([NCAATourneyResults, tourney2019]) # tourney2019 = tourney2019.reset_index() ################################################## # process data ################################################## NCAATourneySeeds['seednum'] = NCAATourneySeeds['Seed'].str.slice(1, 3).astype(int) RegularSeasonResults['tourney'] = 0 NCAATourneyResults['tourney'] = 1 # combine regular and tourney data data = pd.concat([RegularSeasonResults, NCAATourneyResults]) if season: data = data[data.Season == season] # filter season ################################################## # team1: team with lower id data['team1'] = (data['WTeamID'].where(data['WTeamID'] < data['LTeamID'], data['LTeamID'])) # team2: team with higher id data['team2'] = (data['WTeamID'].where(data['WTeamID'] > data['LTeamID'], data['LTeamID'])) data['score1'] = data['WScore'].where(data['WTeamID'] < data['LTeamID'], data['LScore']) data['score2'] = data['WScore'].where(data['WTeamID'] > data['LTeamID'], data['LScore']) boxscore_stats = ['FGM', 'FGA', 'FGM3', 'FGA3', 'FTM', 'FTA', 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF',] if detailed: for stat in boxscore_stats: data[stat + '_team1'] = data['W' + stat].where(data['WTeamID'] < data['LTeamID'], data['L' + stat]) data[stat + '_team2'] = data['W' + stat].where(data['WTeamID'] > data['LTeamID'], data['L' + stat]) data = data.drop(['W'+stat for stat in boxscore_stats], axis=1) data = data.drop(['L'+stat for stat in boxscore_stats], axis=1) data['loc'] = (data['WLoc'] .where(data['WLoc'] != 'H', data['WTeamID']) .where(data['WLoc'] != 'A', data['LTeamID']) .where(data['WLoc'] != 'N', 0)) # 0 if no home court data['team1win'] = np.where(data['WTeamID'] == data['team1'], 1, 0) ################################################## # get tourney seeds data = (data .pipe(pd.merge, NCAATourneySeeds, left_on=['Season', 'team1'], right_on=['Season', 'TeamID'], how='left') .pipe(pd.merge, NCAATourneySeeds, left_on=['Season', 'team2'], right_on=['Season', 'TeamID'], how='left', suffixes=('1', '2')) ) ################################################## # get conferences data = (data .pipe(pd.merge, TeamConferences, left_on=['Season', 'team1'], right_on=['Season', 'TeamID'], how='left') .pipe(pd.merge, TeamConferences, left_on=['Season', 'team2'], right_on=['Season', 'TeamID'], how='left', suffixes=('1', '2')) ) ################################################## # get team names data = (data .pipe(pd.merge, Teams, left_on=['team1'], right_on=['TeamID'], how='left') .pipe(pd.merge, Teams, left_on=['team2'], right_on=['TeamID'], how='left', suffixes=('1', '2')) ) # calculate seed diff data['seeddiff'] = data['seednum2'] - data['seednum1'] data = data.drop(['TeamID1', 'TeamID2', 'WTeamID', 'WScore', 'LTeamID', 'LScore', 'WLoc'], axis=1) data.columns = data.columns.str.lower() data['ID'] = (data[['season', 'team1', 'team2']].astype(str) .apply(lambda x: '_'.join(x), axis=1)) return data
def get_boxscore_dataset_v1(season=None, detailed=False, final_prediction=False): ''' Extend train_data_v1 with seasonwise mean/std boxscore columns for each team and opponent ''' if final_prediction: data_sub = pd.read_csv(os.path.join(get_project_root(), 'input/SampleSubmissionStage2.csv')) data = data_sub['ID'].str.split('_', expand=True).astype(int) data.index = data_sub['ID'] data.columns = ['season', 'team1', 'team2'] data = data.reset_index() else: data = get_train_data_v1(season=season, detailed=detailed) # main data ################################################## # regular season boxscore data ################################################## RegularSeasonDetailedResults = pd.read_csv( os.path.join(DATAFILES_BASEDIR, 'RegularSeasonDetailedResults.csv')) ################################################## # column processing ################################################## cols = RegularSeasonDetailedResults.columns w_cols = (cols.str.slice(0, 1) == 'W') & (~cols.isin(['WLoc'])) l_cols = cols.str.slice(0, 1) == 'L' box_colnames = cols[w_cols].str.slice(1) # remove 'W' and 'L' # for reversing W columns with L cols reverse_dict = dict(zip(list('W' + box_colnames) + list('L' + box_colnames), list('L' + box_colnames) + list('W' + box_colnames))) # for converting W and L boxstats to team and opponent boxstats rename_dict = dict(zip(list('W' + box_colnames) + list('L' + box_colnames), list(box_colnames + '_team') + list(box_colnames + '_opp'))) ################################################## # stack the original and reversed dataframes ################################################## RegularSeasonDetailedResultsStacked = pd.concat( [RegularSeasonDetailedResults, RegularSeasonDetailedResults.rename(reverse_dict, axis=1)], sort=True).rename(rename_dict, axis=1) n = RegularSeasonDetailedResults.shape[0] RegularSeasonDetailedResultsStacked['win'] = np.array([True] * n + [False] * n) ################################################## # calculate boxscore stats ################################################## df_boxstat = (RegularSeasonDetailedResultsStacked[list(rename_dict.values()) + ['Season']] .groupby(['Season', 'TeamID_team']) .agg(['mean', 'std'])) df_boxstat.columns = ['_'.join(col).strip() for col in df_boxstat.columns.values] df_boxstat.columns = df_boxstat.columns.str.lower() drop_cols = df_boxstat.columns[df_boxstat.columns.str.contains('teamid_opp')] df_boxstat = df_boxstat.drop(drop_cols, axis=1) df_boxstat.index.names = ['Season', 'TeamID'] ################################################## # merge with main data ################################################## data = (data .pipe(pd.merge, df_boxstat, left_on=['season', 'team1'], right_index=True, how='left') .pipe(pd.merge, df_boxstat, left_on=['season', 'team2'], right_index=True, how='left', suffixes=('1', '2')) ) return data