Exemplo n.º 1
0
 def load_partitioned_graph_image(self, partitioning_name):
     G, grid_size, partitions_vertices = parse_partitioning(
         str(get_project_root()) + '/partitionings/' + partitioning_name)
     self.G = G
     self.last_number_of_partitions = 2
     self.adjacent_partitions = {1: [2], 2: [1]}
     self.partitions_vertices = partitions_vertices
    def __init__(self):
        #print("Initializing Infersent..")
        model_version = 1
        MODEL_PATH = get_project_root() / Path("encoder/infersent%s.pkl" % model_version)
        params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048,
                        'pool_type': 'max', 'dpout_model': 0.0, 'version': model_version}
        model = InferSent(params_model)
        model.load_state_dict(torch.load(MODEL_PATH))

        # word vector path for the model:
        W2V_PATH = get_project_root() / Path('GloVe/glove.840B.300d.txt' if model_version == 1 else '../fastText/crawl-300d-2M.vec')
        model.set_w2v_path(W2V_PATH)

        # build the vocabulary of word vectors
        model.build_vocab_k_words(K=100000)

        self.model = model
Exemplo n.º 3
0
    def __init__(self):
        self.mapping = dict()

        path_to_json = os.path.join(get_project_root(), 'res/')
        for root, dirs, files in os.walk(path_to_json):
            for file in files:
                if file.endswith('.json'):
                    with open(os.path.join(root, file), "r") as json_file:
                        entry = json.load(json_file)
                        self.mapping[entry['name']] = entry
Exemplo n.º 4
0
 def load_image(self, grid_name, remove_off=False):
     if grid_name:
         start = time.time()
         if remove_off:
             converting_function = convert_image_to_graph_off_removed
         else:
             converting_function = convert_image_to_graph_off_weighted_0
         G, indivisible_areas, off_areas, grid_size, dim_1, dim_2 = converting_function(
             str(get_project_root()) + '/grids/' + grid_name, self.show_progress)
         self.conversion_time = time.time() - start
         self.grid_size = grid_size
         self.G = G
         self.indivisible_areas = indivisible_areas
         self.off_areas = off_areas
         self.last_number_of_partitions = G.number_of_nodes()
         self.vertical_size = dim_1
         self.horizontal_size = dim_2
         self.bigger_dim = dim_1 if dim_1 > dim_2 else dim_2
         self.smaller_dim = dim_1 if dim_1 < dim_2 else dim_2
Exemplo n.º 5
0
    def __init__(self) -> None:
        """SCOUTS Constructor. Defines all aspects of the GUI."""

        # ###
        # ### Main Window setup
        # ###

        # Inherits from QMainWindow
        super().__init__()
        self.rootdir = get_project_root()
        self.threadpool = QThreadPool()
        # Sets values for QMainWindow
        self.setWindowTitle("SCOUTS")
        self.setWindowIcon(
            QIcon(
                os.path.abspath(os.path.join(self.rootdir, 'src',
                                             'scouts.ico'))))
        # Creates StackedWidget as QMainWindow's central widget
        self.stacked_pages = QStackedWidget(self)
        self.setCentralWidget(self.stacked_pages)
        # Creates Widgets for individual "pages" and adds them to the StackedWidget
        self.main_page = QWidget()
        self.samples_page = QWidget()
        self.gating_page = QWidget()
        self.pages = (self.main_page, self.samples_page, self.gating_page)
        for page in self.pages:
            self.stacked_pages.addWidget(page)
        # ## Sets widget at program startup
        self.stacked_pages.setCurrentWidget(self.main_page)

        # ###
        # ### MAIN PAGE
        # ###

        # Main page layout
        self.main_layout = QVBoxLayout(self.main_page)

        # Title section
        # Title
        self.title = QLabel(self.main_page)
        self.title.setText('SCOUTS - Single Cell Outlier Selector')
        self.title.setStyleSheet(self.style['title'])
        self.title.adjustSize()
        self.main_layout.addWidget(self.title)

        # ## Input section
        # Input header
        self.input_header = QLabel(self.main_page)
        self.input_header.setText('Input settings')
        self.input_header.setStyleSheet(self.style['header'])
        self.main_layout.addChildWidget(self.input_header)
        self.input_header.adjustSize()
        self.main_layout.addWidget(self.input_header)
        # Input frame
        self.input_frame = QFrame(self.main_page)
        self.input_frame.setFrameShape(QFrame.StyledPanel)
        self.input_frame.setLayout(QFormLayout())
        self.main_layout.addWidget(self.input_frame)
        # Input button
        self.input_button = QPushButton(self.main_page)
        self.input_button.setStyleSheet(self.style['button'])
        self.set_icon(self.input_button, 'x-office-spreadsheet')
        self.input_button.setObjectName('input')
        self.input_button.setText(' Select input file (.xlsx or .csv)')
        self.input_button.clicked.connect(self.get_path)
        # Input path box
        self.input_path = QLineEdit(self.main_page)
        self.input_path.setObjectName('input_path')
        self.input_path.setStyleSheet(self.style['line edit'])
        # Go to sample naming page
        self.samples_button = QPushButton(self.main_page)
        self.samples_button.setStyleSheet(self.style['button'])
        self.set_icon(self.samples_button, 'preferences-other')
        self.samples_button.setText(' Name samples...')
        self.samples_button.clicked.connect(self.goto_samples_page)
        # Go to gating page
        self.gates_button = QPushButton(self.main_page)
        self.gates_button.setStyleSheet(self.style['button'])
        self.set_icon(self.gates_button, 'preferences-other')
        self.gates_button.setText(' Gating && outlier options...')
        self.gates_button.clicked.connect(self.goto_gates_page)
        # Add widgets above to input frame Layout
        self.input_frame.layout().addRow(self.input_button, self.input_path)
        self.input_frame.layout().addRow(self.samples_button)
        self.input_frame.layout().addRow(self.gates_button)

        # ## Analysis section
        # Analysis header
        self.analysis_header = QLabel(self.main_page)
        self.analysis_header.setText('Analysis settings')
        self.analysis_header.setStyleSheet(self.style['header'])
        self.analysis_header.adjustSize()
        self.main_layout.addWidget(self.analysis_header)
        # Analysis frame
        self.analysis_frame = QFrame(self.main_page)
        self.analysis_frame.setFrameShape(QFrame.StyledPanel)
        self.analysis_frame.setLayout(QVBoxLayout())
        self.main_layout.addWidget(self.analysis_frame)
        # Cutoff text
        self.cutoff_text = QLabel(self.main_page)
        self.cutoff_text.setText('Type of outlier to select:')
        self.cutoff_text.setToolTip(
            'Choose whether to select outliers using the cutoff value from a reference\n'
            'sample (OutR) or by using the cutoff value calculated for each sample\n'
            'individually (OutS)')
        self.cutoff_text.setStyleSheet(self.style['label'])
        # Cutoff button group
        self.cutoff_group = QButtonGroup(self)
        # Cutoff by sample
        self.cutoff_sample = QRadioButton(self.main_page)
        self.cutoff_sample.setText('OutS')
        self.cutoff_sample.setObjectName('sample')
        self.cutoff_sample.setStyleSheet(self.style['radio button'])
        self.cutoff_sample.setChecked(True)
        self.cutoff_group.addButton(self.cutoff_sample)
        # Cutoff by reference
        self.cutoff_reference = QRadioButton(self.main_page)
        self.cutoff_reference.setText('OutR')
        self.cutoff_reference.setObjectName('ref')
        self.cutoff_reference.setStyleSheet(self.style['radio button'])
        self.cutoff_group.addButton(self.cutoff_reference)
        # Both cutoffs
        self.cutoff_both = QRadioButton(self.main_page)
        self.cutoff_both.setText('both')
        self.cutoff_both.setObjectName('sample ref')
        self.cutoff_both.setStyleSheet(self.style['radio button'])
        self.cutoff_group.addButton(self.cutoff_both)
        # Markers text
        self.markers_text = QLabel(self.main_page)
        self.markers_text.setStyleSheet(self.style['label'])
        self.markers_text.setText('Show results for:')
        self.markers_text.setToolTip(
            'Individual markers: for each marker, select outliers\n'
            'Any marker: select cells that are outliers for AT LEAST one marker'
        )
        # Markers button group
        self.markers_group = QButtonGroup(self)
        # Single marker
        self.single_marker = QRadioButton(self.main_page)
        self.single_marker.setText('individual markers')
        self.single_marker.setObjectName('single')
        self.single_marker.setStyleSheet(self.style['radio button'])
        self.single_marker.setChecked(True)
        self.markers_group.addButton(self.single_marker)
        # Any marker
        self.any_marker = QRadioButton(self.main_page)
        self.any_marker.setText('any marker')
        self.any_marker.setObjectName('any')
        self.any_marker.setStyleSheet(self.style['radio button'])
        self.markers_group.addButton(self.any_marker)
        # Both methods
        self.both_methods = QRadioButton(self.main_page)
        self.both_methods.setText('both')
        self.both_methods.setObjectName('single any')
        self.both_methods.setStyleSheet(self.style['radio button'])
        self.markers_group.addButton(self.both_methods)
        # Tukey text
        self.tukey_text = QLabel(self.main_page)
        self.tukey_text.setStyleSheet(self.style['label'])
        # Tukey button group
        self.tukey_text.setText('Tukey factor:')
        self.tukey_group = QButtonGroup(self)
        # Low Tukey value
        self.tukey_low = QRadioButton(self.main_page)
        self.tukey_low.setText('1.5')
        self.tukey_low.setStyleSheet(self.style['radio button'])
        self.tukey_low.setChecked(True)
        self.tukey_group.addButton(self.tukey_low)
        # High Tukey value
        self.tukey_high = QRadioButton(self.main_page)
        self.tukey_high.setText('3.0')
        self.tukey_high.setStyleSheet(self.style['radio button'])
        self.tukey_group.addButton(self.tukey_high)
        # Add widgets above to analysis frame layout
        self.analysis_frame.layout().addWidget(self.cutoff_text)
        self.cutoff_buttons = QHBoxLayout()
        for button in self.cutoff_group.buttons():
            self.cutoff_buttons.addWidget(button)
        self.analysis_frame.layout().addLayout(self.cutoff_buttons)
        self.analysis_frame.layout().addWidget(self.markers_text)
        self.markers_buttons = QHBoxLayout()
        for button in self.markers_group.buttons():
            self.markers_buttons.addWidget(button)
        self.analysis_frame.layout().addLayout(self.markers_buttons)
        self.analysis_frame.layout().addWidget(self.tukey_text)
        self.tukey_buttons = QHBoxLayout()
        for button in self.tukey_group.buttons():
            self.tukey_buttons.addWidget(button)
        self.tukey_buttons.addWidget(QLabel())  # aligns row with 2 buttons
        self.analysis_frame.layout().addLayout(self.tukey_buttons)

        # ## Output section
        # Output header
        self.output_header = QLabel(self.main_page)
        self.output_header.setText('Output settings')
        self.output_header.setStyleSheet(self.style['header'])
        self.output_header.adjustSize()
        self.main_layout.addWidget(self.output_header)
        # Output frame
        self.output_frame = QFrame(self.main_page)
        self.output_frame.setFrameShape(QFrame.StyledPanel)
        self.output_frame.setLayout(QFormLayout())
        self.main_layout.addWidget(self.output_frame)
        # Output button
        self.output_button = QPushButton(self.main_page)
        self.output_button.setStyleSheet(self.style['button'])
        self.set_icon(self.output_button, 'folder')
        self.output_button.setObjectName('output')
        self.output_button.setText(' Select output folder')
        self.output_button.clicked.connect(self.get_path)
        # Output path box
        self.output_path = QLineEdit(self.main_page)
        self.output_path.setStyleSheet(self.style['line edit'])
        # Generate CSV checkbox
        self.output_csv = QCheckBox(self.main_page)
        self.output_csv.setText('Export multiple text files (.csv)')
        self.output_csv.setStyleSheet(self.style['checkbox'])
        self.output_csv.setChecked(True)
        # Generate XLSX checkbox
        self.output_excel = QCheckBox(self.main_page)
        self.output_excel.setText('Export multiple Excel spreadsheets (.xlsx)')
        self.output_excel.setStyleSheet(self.style['checkbox'])
        self.output_excel.clicked.connect(self.enable_single_excel)
        # Generate single, large XLSX checkbox
        self.single_excel = QCheckBox(self.main_page)
        self.single_excel.setText(
            'Also save one multi-sheet Excel spreadsheet')
        self.single_excel.setToolTip(
            'After generating all Excel spreadsheets, SCOUTS combines them into '
            'a single\nExcel spreadsheet where each sheet corresponds to an output'
            'file from SCOUTS')
        self.single_excel.setStyleSheet(self.style['checkbox'])
        self.single_excel.setEnabled(False)
        self.single_excel.clicked.connect(self.memory_warning)
        # Add widgets above to output frame layout
        self.output_frame.layout().addRow(self.output_button, self.output_path)
        self.output_frame.layout().addRow(self.output_csv)
        self.output_frame.layout().addRow(self.output_excel)
        self.output_frame.layout().addRow(self.single_excel)

        # ## Run & help-quit section
        # Run button (stand-alone)
        self.run_button = QPushButton(self.main_page)
        self.set_icon(self.run_button, 'system-run')
        self.run_button.setText(' Run!')
        self.run_button.setStyleSheet(self.style['run button'])
        self.main_layout.addWidget(self.run_button)
        self.run_button.clicked.connect(self.run)
        # Help-quit frame (invisible)
        self.helpquit_frame = QFrame(self.main_page)
        self.helpquit_frame.setLayout(QHBoxLayout())
        self.helpquit_frame.layout().setMargin(0)
        self.main_layout.addWidget(self.helpquit_frame)
        # Help button
        self.help_button = QPushButton(self.main_page)
        self.set_icon(self.help_button, 'help-about')
        self.help_button.setText(' Help')
        self.help_button.setStyleSheet(self.style['md button'])
        self.help_button.clicked.connect(self.get_help)
        # Quit button
        self.quit_button = QPushButton(self.main_page)
        self.set_icon(self.quit_button, 'process-stop')
        self.quit_button.setText(' Quit')
        self.quit_button.setStyleSheet(self.style['md button'])
        self.quit_button.clicked.connect(self.close)
        # Add widgets above to help-quit layout
        self.helpquit_frame.layout().addWidget(self.help_button)
        self.helpquit_frame.layout().addWidget(self.quit_button)

        # ###
        # ### SAMPLES PAGE
        # ###

        # Samples page layout
        self.samples_layout = QVBoxLayout(self.samples_page)

        # ## Title section
        # Title
        self.samples_title = QLabel(self.samples_page)
        self.samples_title.setText('Name your samples')
        self.samples_title.setStyleSheet(self.style['title'])
        self.samples_title.adjustSize()
        self.samples_layout.addWidget(self.samples_title)
        # Subtitle
        self.samples_subtitle = QLabel(self.samples_page)
        string = (
            'Please name the samples to be analysed by SCOUTS.\n\nSCOUTS searches the first '
            'column of your data\nand locates the exact string as part of the sample name.'
        )
        self.samples_subtitle.setText(string)
        self.samples_subtitle.setStyleSheet(self.style['label'])
        self.samples_subtitle.adjustSize()
        self.samples_layout.addWidget(self.samples_subtitle)

        # ## Sample addition section
        # Sample addition frame
        self.samples_frame = QFrame(self.samples_page)
        self.samples_frame.setFrameShape(QFrame.StyledPanel)
        self.samples_frame.setLayout(QGridLayout())
        self.samples_layout.addWidget(self.samples_frame)
        # Sample name box
        self.sample_name = QLineEdit(self.samples_page)
        self.sample_name.setStyleSheet(self.style['line edit'])
        self.sample_name.setPlaceholderText('Sample name ...')
        # Reference check
        self.is_reference = QCheckBox(self.samples_page)
        self.is_reference.setText('Reference?')
        self.is_reference.setStyleSheet(self.style['checkbox'])
        # Add sample to table
        self.add_sample_button = QPushButton(self.samples_page)
        QShortcut(QKeySequence("Return"), self.add_sample_button,
                  self.write_to_sample_table)
        self.set_icon(self.add_sample_button, 'list-add')
        self.add_sample_button.setText(' Add sample (Enter)')
        self.add_sample_button.setStyleSheet(self.style['button'])
        self.add_sample_button.clicked.connect(self.write_to_sample_table)
        # Remove sample from table
        self.remove_sample_button = QPushButton(self.samples_page)
        QShortcut(QKeySequence("Delete"), self.remove_sample_button,
                  self.remove_from_sample_table)
        self.set_icon(self.remove_sample_button, 'list-remove')
        self.remove_sample_button.setText(' Remove sample (Del)')
        self.remove_sample_button.setStyleSheet(self.style['button'])
        self.remove_sample_button.clicked.connect(
            self.remove_from_sample_table)
        # Add widgets above to sample addition layout
        self.samples_frame.layout().addWidget(self.sample_name, 0, 0)
        self.samples_frame.layout().addWidget(self.is_reference, 1, 0)
        self.samples_frame.layout().addWidget(self.add_sample_button, 0, 1)
        self.samples_frame.layout().addWidget(self.remove_sample_button, 1, 1)

        # ## Sample table
        self.sample_table = QTableWidget(self.samples_page)
        self.sample_table.setColumnCount(2)
        self.sample_table.setHorizontalHeaderItem(0,
                                                  QTableWidgetItem('Sample'))
        self.sample_table.setHorizontalHeaderItem(
            1, QTableWidgetItem('Reference?'))
        self.sample_table.horizontalHeader().setSectionResizeMode(
            0, QHeaderView.Stretch)
        self.sample_table.horizontalHeader().setSectionResizeMode(
            1, QHeaderView.ResizeToContents)
        self.samples_layout.addWidget(self.sample_table)

        # ## Save & clear buttons
        # Save & clear frame (invisible)
        self.saveclear_frame = QFrame(self.samples_page)
        self.saveclear_frame.setLayout(QHBoxLayout())
        self.saveclear_frame.layout().setMargin(0)
        self.samples_layout.addWidget(self.saveclear_frame)
        # Clear samples button
        self.clear_samples = QPushButton(self.samples_page)
        self.set_icon(self.clear_samples, 'edit-delete')
        self.clear_samples.setText(' Clear table')
        self.clear_samples.setStyleSheet(self.style['md button'])
        self.clear_samples.clicked.connect(self.prompt_clear_data)
        # Save samples button
        self.save_samples = QPushButton(self.samples_page)
        self.set_icon(self.save_samples, 'document-save')
        self.save_samples.setText(' Save samples')
        self.save_samples.setStyleSheet(self.style['md button'])
        self.save_samples.clicked.connect(self.goto_main_page)
        # Add widgets above to save & clear layout
        self.saveclear_frame.layout().addWidget(self.clear_samples)
        self.saveclear_frame.layout().addWidget(self.save_samples)

        # ###
        # ### GATING PAGE
        # ###

        # Gating page layout
        self.gating_layout = QVBoxLayout(self.gating_page)

        # ## Title section
        # Title
        self.gates_title = QLabel(self.gating_page)
        self.gates_title.setText('Gating & outlier options')
        self.gates_title.setStyleSheet(self.style['title'])
        self.gates_title.adjustSize()
        self.gating_layout.addWidget(self.gates_title)

        # ## Gating options section
        # Gating header
        self.gate_header = QLabel(self.gating_page)
        self.gate_header.setText('Gating')
        self.gate_header.setStyleSheet(self.style['header'])
        self.gate_header.adjustSize()
        self.gating_layout.addWidget(self.gate_header)

        # Gating frame
        self.gate_frame = QFrame(self.gating_page)
        self.gate_frame.setFrameShape(QFrame.StyledPanel)
        self.gate_frame.setLayout(QFormLayout())
        self.gating_layout.addWidget(self.gate_frame)
        # Gating button group
        self.gating_group = QButtonGroup(self)
        # Do not gate samples
        self.no_gates = QRadioButton(self.gating_page)
        self.no_gates.setObjectName('no_gate')
        self.no_gates.setText("Don't gate samples")
        self.no_gates.setStyleSheet(self.style['radio button'])
        self.no_gates.setChecked(True)
        self.gating_group.addButton(self.no_gates)
        self.no_gates.clicked.connect(self.activate_gate)
        # CyToF gating
        self.cytof_gates = QRadioButton(self.gating_page)
        self.cytof_gates.setObjectName('cytof')
        self.cytof_gates.setText('Mass Cytometry gating')
        self.cytof_gates.setStyleSheet(self.style['radio button'])
        self.cytof_gates.setToolTip(
            'Exclude cells for which the average expression of all\n'
            'markers is below the selected value')
        self.gating_group.addButton(self.cytof_gates)
        self.cytof_gates.clicked.connect(self.activate_gate)
        # CyToF gating spinbox
        self.cytof_gates_value = QDoubleSpinBox(self.gating_page)
        self.cytof_gates_value.setMinimum(0)
        self.cytof_gates_value.setMaximum(1)
        self.cytof_gates_value.setValue(0.1)
        self.cytof_gates_value.setSingleStep(0.05)
        self.cytof_gates_value.setEnabled(False)
        # scRNA-Seq gating
        self.rnaseq_gates = QRadioButton(self.gating_page)
        self.rnaseq_gates.setText('scRNA-Seq gating')
        self.rnaseq_gates.setStyleSheet(self.style['radio button'])
        self.rnaseq_gates.setToolTip(
            'When calculating cutoff, ignore reads below the selected value')
        self.rnaseq_gates.setObjectName('rnaseq')
        self.gating_group.addButton(self.rnaseq_gates)
        self.rnaseq_gates.clicked.connect(self.activate_gate)
        # scRNA-Seq gating spinbox
        self.rnaseq_gates_value = QDoubleSpinBox(self.gating_page)
        self.rnaseq_gates_value.setMinimum(0)
        self.rnaseq_gates_value.setMaximum(10)
        self.rnaseq_gates_value.setValue(0)
        self.rnaseq_gates_value.setSingleStep(1)
        self.rnaseq_gates_value.setEnabled(False)
        # export gated population checkbox
        self.export_gated = QCheckBox(self.gating_page)
        self.export_gated.setText('Export gated cells as an output file')
        self.export_gated.setStyleSheet(self.style['checkbox'])
        self.export_gated.setEnabled(False)
        # Add widgets above to Gate frame layout
        self.gate_frame.layout().addRow(self.no_gates, QLabel())
        self.gate_frame.layout().addRow(self.cytof_gates,
                                        self.cytof_gates_value)
        self.gate_frame.layout().addRow(self.rnaseq_gates,
                                        self.rnaseq_gates_value)
        self.gate_frame.layout().addRow(self.export_gated, QLabel())

        # ## Outlier options section
        # Outlier header
        self.outlier_header = QLabel(self.gating_page)
        self.outlier_header.setText('Outliers')
        self.outlier_header.setStyleSheet(self.style['header'])
        self.outlier_header.adjustSize()
        self.gating_layout.addWidget(self.outlier_header)
        # Outlier frame
        self.outlier_frame = QFrame(self.gating_page)
        self.outlier_frame.setFrameShape(QFrame.StyledPanel)
        self.outlier_frame.setLayout(QVBoxLayout())
        self.gating_layout.addWidget(self.outlier_frame)
        # Top outliers information
        self.top_outliers = QLabel(self.gating_page)
        self.top_outliers.setStyleSheet(self.style['label'])
        self.top_outliers.setText(
            'By default, SCOUTS selects the top outliers from the population')
        self.top_outliers.setStyleSheet(self.style['label'])
        # Bottom outliers data
        self.bottom_outliers = QCheckBox(self.gating_page)
        self.bottom_outliers.setText('Include results for low outliers')
        self.bottom_outliers.setStyleSheet(self.style['checkbox'])
        # Non-outliers data
        self.not_outliers = QCheckBox(self.gating_page)
        self.not_outliers.setText('Include results for non-outliers')
        self.not_outliers.setStyleSheet(self.style['checkbox'])
        # Add widgets above to Gate frame layout
        self.outlier_frame.layout().addWidget(self.top_outliers)
        self.outlier_frame.layout().addWidget(self.bottom_outliers)
        self.outlier_frame.layout().addWidget(self.not_outliers)

        # ## Save/back button
        self.save_gates = QPushButton(self.gating_page)
        self.set_icon(self.save_gates, 'go-next')
        self.save_gates.setText(' Back to main menu')
        self.save_gates.setStyleSheet(self.style['md button'])
        self.gating_layout.addWidget(self.save_gates)
        self.save_gates.clicked.connect(self.goto_main_page)

        # ## Add empty label to take vertical space
        self.empty_label = QLabel(self.gating_page)
        self.empty_label.setSizePolicy(QSizePolicy.Expanding,
                                       QSizePolicy.Expanding)
        self.gating_layout.addWidget(self.empty_label)
Exemplo n.º 6
0
"""
Fit accelerated failure time model on the merge uncongested and pre-breakdown data.
"""
import os
import pandas as pd
from lifelines import WeibullAFTFitter
from src.utils import get_project_root
import plotly.express as px
import plotly.io as pio
import numpy as np
from matplotlib import pyplot as plt

pio.renderers.default = "browser"

# Set paths:
path_to_project = str(get_project_root())
path_interim = os.path.join(path_to_project, "data", "interim")
path_processed = os.path.join(path_to_project, "data", "processed")
path_figures_v1 = os.path.join(path_to_project, "figures_v1")
if not os.path.exists(path_figures_v1):
    os.mkdir(path_figures_v1)
path_plot_dump = os.path.join(path_figures_v1, "plot_dump")
if not os.path.exists(path_plot_dump):
    os.mkdir(path_plot_dump)
path_cap_df_merge_and_meta = os.path.join(path_interim, "all_merge_meta.csv")
path_prebreakdown_merge = os.path.join(path_interim,
                                       "prebkdn_uncongested_merge_meta.csv")


def one_hot_coding_cat(data):
    hot_code_df = pd.get_dummies(data.geometry_type)
from src.utils import get_project_root, predict_and_save_to_txt, predict_and_save_to_xlsx

ROOT_DIR = get_project_root()
working_dir = str(ROOT_DIR) + "/src/testing/svm"
print(ROOT_DIR)
test_file = str(ROOT_DIR) + "/resources/sell_detection_train.v1.0.txt"
model_file = working_dir + "/" + "svm_final_model_official.sav"

txt_result_file = working_dir + "/" + "result.txt"
predict_and_save_to_txt(test_file, model_file, txt_result_file)

xlsx_result_file = working_dir + "/" + "result.xlsx"
predict_and_save_to_xlsx(test_file, model_file, xlsx_result_file)
import pandas as pd
import numpy as np
import os
from src.utils import get_project_root


DATAFILES_BASEDIR = os.path.join(get_project_root(), 'input/datafiles/')


def get_train_data_v1(season=None):
    ##################################################
    # read data
    ##################################################
    RegularSeasonCompactResults = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'RegularSeasonCompactResults.csv'))
    NCAATourneyCompactResults = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'NCAATourneyCompactResults.csv'))
    NCAATourneySeeds = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'NCAATourneySeeds.csv'))
    TeamConferences = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'TeamConferences.csv'))
    Conferences = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'Conferences.csv'))
    Teams = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'Teams.csv'))
    TeamConferences = (pd.merge(TeamConferences, Conferences, on='ConfAbbrev')
                       .rename({'Description': 'conf_descr'}, axis=1))
    ##################################################
    # process data
    ##################################################
    NCAATourneySeeds['seednum'] = NCAATourneySeeds['Seed'].str.slice(1, 3).astype(int)
Exemplo n.º 9
0
import os
import pandas as pd
from src.utils import fix_birth_date, fix_transaction_date, get_project_root

root_folder = get_project_root()
person_file_path = os.path.join(root_folder, 'data',
                                'BI_assignment_person.csv')
account_file_path = os.path.join(root_folder, 'data',
                                 'BI_assignment_account.csv')
transaction_file_path = os.path.join(root_folder, 'data',
                                     'BI_assignment_transaction.csv')


def process_all_files(engine):
    process_person_file(engine)
    process_account_file(engine)
    process_transaction_file(engine)


def process_person_file(engine):
    person_df = pd.read_csv(person_file_path)
    person_df.dropna(how='all', inplace=True)
    person_df['birth_date'] = person_df['birth_date'].apply(fix_birth_date)
    person_df['birth_date'] = person_df['birth_date'].astype('datetime64[ns]')
    person_df.to_sql('Person', con=engine, if_exists='append', index=False)


def process_account_file(engine):
    account_df = pd.read_csv(account_file_path)
    account_df.to_sql('Account', con=engine, if_exists='append', index=False)
Exemplo n.º 10
0
import random
import traceback

from fontTools.fontBuilder import FontBuilder
from fontTools.pens.ttGlyphPen import TTGlyphPen
from fontTools.ttLib import TTFont

from config import NAME_STRING
from src.utils import str_has_whitespace, str_has_emoji, deduplicate_str, ensure_cmap_has_all_text, subset_ttf_font, \
    get_project_root

root = get_project_root()


def obfuscate(plain_text,
              shadow_text,
              filename: str,
              only_ttf: bool,
              target_path: str = 'output') -> dict:
    """
    :param plain_text: 用户看到的内容
    :param shadow_text: 爬虫看到的内容
    :param filename: 不含格式后缀的文件名
    :param only_ttf: 是否需要woff、woff2格式
    :param target_path: 生成的目标目录
    """

    if str_has_whitespace(plain_text) | str_has_whitespace(shadow_text):
        raise Exception('明文或阴书不允许含有空格')

    if str_has_emoji(plain_text) | str_has_emoji(shadow_text):
Exemplo n.º 11
0
 def setUp(self):
     self.storage_loc = os.path.join(get_project_root(),
                                     "test_metadata_storage")
     self.storage = LocalStorageAccessor(self.storage_loc)
Exemplo n.º 12
0
import pystan
import pickle
import argparse
import os
from src.utils import get_project_root  # see src/ folder in project repo
from src.data import make_dataset
import pandas as pd
import numpy as np
from sklearn.metrics import log_loss

MODEL_BASEDIR = os.path.join(get_project_root(), 'models')


def create_stan_model():
    model_code = '''
    /*
    pairwise logistic regression model of winning the game

    - Use boxscore averages for regression features
    - Share parameters with regression model of scorediff

    */

    data {
      int<lower=0> N_teams;
      int<lower=0> N;  // number of games in regular season
      int<lower=0> N_tourney;  // number of games in tournament
      int<lower=1, upper=N_teams> j_team[N + N_tourney];  // index for team 1
      int<lower=1, upper=N_teams> k_team[N + N_tourney];  // index for team 2
      real x1[N + N_tourney];  // score_mean_team1 - score_opp_team2
      real x2[N + N_tourney];  // score_opp_team1 - score_mean_team2
Exemplo n.º 13
0
    def __init__(self) -> None:
        """ViolinGUI Constructor. Defines all aspects of the GUI."""
        # ## Setup section
        # Inherits from QMainWindow
        super().__init__()
        self.rootdir = get_project_root()
        # QMainWindow basic properties
        self.setWindowTitle("SCOUTS - Violins")
        self.setWindowIcon(
            QIcon(
                os.path.abspath(os.path.join(self.rootdir, 'src',
                                             'scouts.ico'))))
        # Creates QWidget as QMainWindow's central widget
        self.page = QWidget(self)
        self.setCentralWidget(self.page)
        # Miscellaneous initialization values
        self.threadpool = QThreadPool()  # Threadpool for workers
        self.population_df = None  # DataFrame of whole population (raw data)
        self.summary_df = None  # DataFrame indicating which SCOUTS output corresponds to which rule
        self.summary_path = None  # path to all DataFrames generated by SCOUTS

        self.main_layout = QVBoxLayout(self.page)

        # Title section
        # Title
        self.title = QLabel(self.page)
        self.title.setText('SCOUTS - Violins')
        self.title.setStyleSheet(self.style['title'])
        self.title.adjustSize()
        self.main_layout.addWidget(self.title)

        # ## Input section
        # Input header
        self.input_header = QLabel(self.page)
        self.input_header.setText('Load data')
        self.input_header.setStyleSheet(self.style['header'])
        self.input_header.adjustSize()
        self.main_layout.addWidget(self.input_header)
        # Input/Output frame
        self.input_frame = QFrame(self.page)
        self.input_frame.setFrameShape(QFrame.StyledPanel)
        self.input_frame.setLayout(QFormLayout())
        self.main_layout.addWidget(self.input_frame)
        # Raw data button
        self.input_button = QPushButton(self.page)
        self.input_button.setStyleSheet(self.style['button'])
        self.set_icon(self.input_button, 'x-office-spreadsheet')
        self.input_button.setObjectName('file')
        self.input_button.setText(' Load raw data file')
        self.input_button.setToolTip(
            'Load raw data file (the file given to SCOUTS as the input file)')
        self.input_button.clicked.connect(self.get_path)
        # SCOUTS results button
        self.output_button = QPushButton(self.page)
        self.output_button.setStyleSheet(self.style['button'])
        self.set_icon(self.output_button, 'folder')
        self.output_button.setObjectName('folder')
        self.output_button.setText(' Load SCOUTS results')
        self.output_button.setToolTip(
            'Load data from SCOUTS analysis '
            '(the folder given to SCOUTS as the output folder)')
        self.output_button.clicked.connect(self.get_path)
        # Add widgets above to input frame Layout
        self.input_frame.layout().addRow(self.input_button)
        self.input_frame.layout().addRow(self.output_button)

        # ## Samples section
        # Samples header
        self.samples_header = QLabel(self.page)
        self.samples_header.setText('Select sample names')
        self.samples_header.setStyleSheet(self.style['header'])
        self.samples_header.adjustSize()
        self.main_layout.addWidget(self.samples_header)
        # Samples frame
        self.samples_frame = QFrame(self.page)
        self.samples_frame.setFrameShape(QFrame.StyledPanel)
        self.samples_frame.setLayout(QFormLayout())
        self.main_layout.addWidget(self.samples_frame)
        # Samples label
        self.samples_label = QLabel(self.page)
        self.samples_label.setText(
            'Write sample names delimited by semicolons below.\nEx: Control;Treat_01;Pac-03'
        )
        self.samples_label.setStyleSheet(self.style['label'])
        # Sample names line edit
        self.sample_names = QLineEdit(self.page)
        self.sample_names.setStyleSheet(self.style['line edit'])
        # Add widgets above to samples frame Layout
        self.samples_frame.layout().addRow(self.samples_label)
        self.samples_frame.layout().addRow(self.sample_names)

        # ## Analysis section
        # Analysis header
        self.analysis_header = QLabel(self.page)
        self.analysis_header.setText('Plot parameters')
        self.analysis_header.setStyleSheet(self.style['header'])
        self.analysis_header.adjustSize()
        self.main_layout.addWidget(self.analysis_header)
        # Analysis frame
        self.analysis_frame = QFrame(self.page)
        self.analysis_frame.setFrameShape(QFrame.StyledPanel)
        self.analysis_frame.setLayout(QFormLayout())
        self.main_layout.addWidget(self.analysis_frame)
        # Analysis labels
        self.analysis_label_01 = QLabel(self.page)
        self.analysis_label_01.setText('Compare')
        self.analysis_label_01.setStyleSheet(self.style['label'])
        self.analysis_label_02 = QLabel(self.page)
        self.analysis_label_02.setText('with')
        self.analysis_label_02.setStyleSheet(self.style['label'])
        self.analysis_label_03 = QLabel(self.page)
        self.analysis_label_03.setText('for marker')
        self.analysis_label_03.setStyleSheet(self.style['label'])
        self.analysis_label_04 = QLabel(self.page)
        self.analysis_label_04.setText('Outlier type')
        self.analysis_label_04.setStyleSheet(self.style['label'])
        # Analysis drop-down boxes
        self.drop_down_01 = QComboBox(self.page)
        self.drop_down_01.addItems([
            'whole population', 'non-outliers', 'top outliers',
            'bottom outliers', 'none'
        ])
        self.drop_down_01.setStyleSheet(self.style['drop down'])
        self.drop_down_01.setCurrentIndex(2)
        self.drop_down_02 = QComboBox(self.page)
        self.drop_down_02.addItems([
            'whole population', 'non-outliers', 'top outliers',
            'bottom outliers', 'none'
        ])
        self.drop_down_02.setStyleSheet(self.style['drop down'])
        self.drop_down_02.setCurrentIndex(0)
        self.drop_down_03 = QComboBox(self.page)
        self.drop_down_03.setStyleSheet(self.style['drop down'])
        self.drop_down_04 = QComboBox(self.page)
        self.drop_down_04.addItems(['OutS', 'OutR'])
        self.drop_down_04.setStyleSheet(self.style['drop down'])
        # Add widgets above to samples frame Layout
        self.analysis_frame.layout().addRow(self.analysis_label_01,
                                            self.drop_down_01)
        self.analysis_frame.layout().addRow(self.analysis_label_02,
                                            self.drop_down_02)
        self.analysis_frame.layout().addRow(self.analysis_label_03,
                                            self.drop_down_03)
        self.analysis_frame.layout().addRow(self.analysis_label_04,
                                            self.drop_down_04)

        self.legend_checkbox = QCheckBox(self.page)
        self.legend_checkbox.setText('Add legend to the plot')
        self.legend_checkbox.setStyleSheet(self.style['checkbox'])
        self.main_layout.addWidget(self.legend_checkbox)

        # Plot button (stand-alone)
        self.plot_button = QPushButton(self.page)
        self.set_icon(self.plot_button, 'system-run')
        self.plot_button.setText(' Plot')
        self.plot_button.setToolTip(
            'Plot data after loading the input data and selecting parameters')
        self.plot_button.setStyleSheet(self.style['run button'])
        self.plot_button.setEnabled(False)
        self.plot_button.clicked.connect(self.run_plot)
        self.main_layout.addWidget(self.plot_button)

        # ## Secondary Window
        # This is used to plot the violins only
        self.secondary_window = QMainWindow(self)
        self.secondary_window.resize(720, 720)
        self.dynamic_canvas = DynamicCanvas(self.secondary_window,
                                            width=6,
                                            height=6,
                                            dpi=120)
        self.secondary_window.setCentralWidget(self.dynamic_canvas)
        self.secondary_window.addToolBar(
            NavBar(self.dynamic_canvas, self.secondary_window))
def get_train_data_v1(season=None, detailed=False):
    detail = 'Detailed' if detailed else 'Compact'
    ##################################################
    # read data
    ##################################################
    RegularSeasonResults = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'RegularSeason{}Results.csv'.format(detail)))
    NCAATourneyResults = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'NCAATourney{}Results.csv'.format(detail)))
    NCAATourneySeeds = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'NCAATourneySeeds.csv'))
    TeamConferences = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'TeamConferences.csv'))
    Conferences = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'Conferences.csv'))
    Teams = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'Teams.csv'))
    TeamConferences = (pd.merge(TeamConferences, Conferences, on='ConfAbbrev')
                       .rename({'Description': 'conf_descr'}, axis=1))
    SampleSubmissionStage2 = pd.read_csv(
        os.path.join(get_project_root(), 'input/SampleSubmissionStage2.csv'))
    tourney2019 = SampleSubmissionStage2['ID'].str.split('_', expand=True).astype(int)
    # tourney2019.index = SampleSubmissionStage2['ID']
    tourney2019.columns = ['Season', 'WTeamID', 'LTeamID']
    NCAATourneyResults = pd.concat([NCAATourneyResults, tourney2019])
    # tourney2019 = tourney2019.reset_index()
    ##################################################
    # process data
    ##################################################
    NCAATourneySeeds['seednum'] = NCAATourneySeeds['Seed'].str.slice(1, 3).astype(int)
    RegularSeasonResults['tourney'] = 0
    NCAATourneyResults['tourney'] = 1
    # combine regular and tourney data
    data = pd.concat([RegularSeasonResults, NCAATourneyResults])
    if season:
        data = data[data.Season == season]  # filter season
    ##################################################
    # team1: team with lower id
    data['team1'] = (data['WTeamID'].where(data['WTeamID'] < data['LTeamID'],
                                           data['LTeamID']))
    # team2: team with higher id
    data['team2'] = (data['WTeamID'].where(data['WTeamID'] > data['LTeamID'],
                                           data['LTeamID']))
    data['score1'] = data['WScore'].where(data['WTeamID'] < data['LTeamID'], data['LScore'])
    data['score2'] = data['WScore'].where(data['WTeamID'] > data['LTeamID'], data['LScore'])
    boxscore_stats = ['FGM', 'FGA', 'FGM3', 'FGA3', 'FTM', 'FTA',
                      'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF',]
    if detailed:
        for stat in boxscore_stats:
            data[stat + '_team1'] = data['W' + stat].where(data['WTeamID'] < data['LTeamID'],
                                                      data['L' + stat])
            data[stat + '_team2'] = data['W' + stat].where(data['WTeamID'] > data['LTeamID'],
                                                      data['L' + stat])
        data = data.drop(['W'+stat for stat in boxscore_stats], axis=1)
        data = data.drop(['L'+stat for stat in boxscore_stats], axis=1)
    data['loc'] = (data['WLoc']
                   .where(data['WLoc'] != 'H', data['WTeamID'])
                   .where(data['WLoc'] != 'A', data['LTeamID'])
                   .where(data['WLoc'] != 'N', 0))  # 0 if no home court
    data['team1win'] = np.where(data['WTeamID'] == data['team1'], 1, 0)
    ##################################################
    # get tourney seeds
    data = (data
            .pipe(pd.merge, NCAATourneySeeds,
                  left_on=['Season', 'team1'], right_on=['Season', 'TeamID'],
                  how='left')
            .pipe(pd.merge, NCAATourneySeeds,
                  left_on=['Season', 'team2'], right_on=['Season', 'TeamID'],
                  how='left', suffixes=('1', '2'))
            )
    ##################################################
    # get conferences
    data = (data
            .pipe(pd.merge, TeamConferences,
                  left_on=['Season', 'team1'], right_on=['Season', 'TeamID'],
                  how='left')
            .pipe(pd.merge, TeamConferences,
                  left_on=['Season', 'team2'], right_on=['Season', 'TeamID'],
                  how='left', suffixes=('1', '2'))
            )
    ##################################################
    # get team names
    data = (data
            .pipe(pd.merge, Teams,
                  left_on=['team1'], right_on=['TeamID'],
                  how='left')
            .pipe(pd.merge, Teams,
                  left_on=['team2'], right_on=['TeamID'],
                  how='left', suffixes=('1', '2'))
            )
    # calculate seed diff
    data['seeddiff'] = data['seednum2'] - data['seednum1']
    data = data.drop(['TeamID1', 'TeamID2', 'WTeamID', 'WScore', 'LTeamID', 'LScore', 'WLoc'], axis=1)
    data.columns = data.columns.str.lower()
    data['ID'] = (data[['season', 'team1', 'team2']].astype(str)
                  .apply(lambda x: '_'.join(x), axis=1))
    return data
def get_boxscore_dataset_v1(season=None, detailed=False, final_prediction=False):
    '''
    Extend train_data_v1 with seasonwise mean/std boxscore columns for each team and opponent
    '''
    if final_prediction:
        data_sub = pd.read_csv(os.path.join(get_project_root(), 'input/SampleSubmissionStage2.csv'))
        data = data_sub['ID'].str.split('_', expand=True).astype(int)
        data.index = data_sub['ID']
        data.columns = ['season', 'team1', 'team2']
        data = data.reset_index()
    else:
        data = get_train_data_v1(season=season, detailed=detailed) # main data
    ##################################################
    # regular season boxscore data
    ##################################################
    RegularSeasonDetailedResults = pd.read_csv(
        os.path.join(DATAFILES_BASEDIR, 'RegularSeasonDetailedResults.csv'))
    ##################################################
    # column processing
    ##################################################
    cols = RegularSeasonDetailedResults.columns
    w_cols = (cols.str.slice(0, 1) == 'W') & (~cols.isin(['WLoc']))
    l_cols = cols.str.slice(0, 1) == 'L'
    box_colnames = cols[w_cols].str.slice(1)  # remove 'W' and 'L'
    # for reversing W columns with L cols
    reverse_dict = dict(zip(list('W' + box_colnames) + list('L' + box_colnames),
                            list('L' + box_colnames) + list('W' + box_colnames)))
    # for converting W and L boxstats to team and opponent boxstats
    rename_dict = dict(zip(list('W' + box_colnames) + list('L' + box_colnames),
                           list(box_colnames + '_team') + list(box_colnames + '_opp')))
    ##################################################
    # stack the original and reversed dataframes
    ##################################################
    RegularSeasonDetailedResultsStacked = pd.concat(
        [RegularSeasonDetailedResults,
         RegularSeasonDetailedResults.rename(reverse_dict, axis=1)],
        sort=True).rename(rename_dict, axis=1)
    n = RegularSeasonDetailedResults.shape[0]
    RegularSeasonDetailedResultsStacked['win'] = np.array([True] * n + [False] * n)
    ##################################################
    # calculate boxscore stats
    ##################################################
    df_boxstat = (RegularSeasonDetailedResultsStacked[list(rename_dict.values()) + ['Season']]
                  .groupby(['Season', 'TeamID_team'])
                  .agg(['mean', 'std']))
    df_boxstat.columns = ['_'.join(col).strip() for col in df_boxstat.columns.values]
    df_boxstat.columns = df_boxstat.columns.str.lower()
    drop_cols = df_boxstat.columns[df_boxstat.columns.str.contains('teamid_opp')]
    df_boxstat = df_boxstat.drop(drop_cols, axis=1)
    df_boxstat.index.names = ['Season', 'TeamID']
    ##################################################
    # merge with main data
    ##################################################
    data = (data
            .pipe(pd.merge, df_boxstat,
                  left_on=['season', 'team1'], right_index=True,
                  how='left')
            .pipe(pd.merge, df_boxstat,
                  left_on=['season', 'team2'], right_index=True,
                  how='left', suffixes=('1', '2'))
            )
    return data