Exemplo n.º 1
0
class AbsoluteRelativeSpinBox(QWidget):
    editingFinished = pyqtSignal()
    valueChanged = pyqtSignal()

    def __init__(self, *args, **kwargs):
        super().__init__(*args)
        layout = QStackedLayout(self)

        self.double_spin = QDoubleSpinBox()
        self.double_spin.valueChanged.connect(self.double_value_changed)
        self.double_spin.editingFinished.connect(self.double_editing_finished)
        layout.addWidget(self.double_spin)

        self.int_spin = QSpinBox()
        self.int_spin.setMaximum(10 ** 4)
        self.int_spin.valueChanged.connect(self.int_value_changed)
        self.int_spin.editingFinished.connect(self.int_editing_finished)
        layout.addWidget(self.int_spin)

        self.setValue(kwargs.get('value', 0.))

    def double_value_changed(self):
        if self.double_spin.value() > 1:
            self.layout().setCurrentIndex(1)
            self.int_spin.setValue(self.double_spin.value())

        self.valueChanged.emit()

    def double_editing_finished(self):
        if self.double_spin.value() <= 1.:
            self.editingFinished.emit()

    def int_value_changed(self):
        if self.int_spin.value() == 0:
            self.layout().setCurrentIndex(0)
            self.double_spin.setValue(1. - self.double_spin.singleStep())
            # There is no need to emit valueChanged signal.

    def int_editing_finished(self):
        if self.int_spin.value() > 0:
            self.editingFinished.emit()

    def value(self):
        return self.int_spin.value() or self.double_spin.value()

    def setValue(self, value):
        if isinstance(value, int):
            self.layout().setCurrentIndex(1)
            self.int_spin.setValue(value)
        else:
            self.layout().setCurrentIndex(0)
            self.double_spin.setValue(value)

    def setSingleStep(self, step):
        if isinstance(step, float):
            self.double_spin.setSingleStep(step)
        else:
            self.int_spin.setSingleStep(step)
Exemplo n.º 2
0
class PCA(BaseEditor):

    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self.setLayout(QVBoxLayout())

        self.n_components = 10

        form = QFormLayout()
        self.cspin = QSpinBox(minimum=1, value=self.n_components)
        self.cspin.valueChanged[int].connect(self.setC)
        self.cspin.editingFinished.connect(self.edited)

        form.addRow("Components:", self.cspin)
        self.layout().addLayout(form)

    def setParameters(self, params):
        self.n_components = params.get("n_components", 10)

    def parameters(self):
        return {"n_components": self.n_components}

    def setC(self, n_components):
        if self.n_components != n_components:
            self.n_components = n_components
            self.cspin.setValue(n_components)
            self.changed.emit()

    @staticmethod
    def createinstance(params):
        n_components = params.get("n_components", 10)
        return ProjectPCA(n_components=n_components)

    def __repr__(self):
        return "Components: {}".format(self.cspin.value())
Exemplo n.º 3
0
class PCA(BaseEditor):

    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self.setLayout(QVBoxLayout())

        self.n_components = 10

        form = QFormLayout()
        self.cspin = QSpinBox(minimum=1, value=self.n_components)
        self.cspin.valueChanged[int].connect(self.setC)
        self.cspin.editingFinished.connect(self.edited)

        form.addRow("Components:", self.cspin)
        self.layout().addLayout(form)

    def setParameters(self, params):
        self.n_components = params.get("n_components", 10)

    def parameters(self):
        return {"n_components": self.n_components}

    def setC(self, n_components):
        if self.n_components != n_components:
            self.n_components = n_components
            self.cspin.setValue(n_components)
            self.changed.emit()

    @staticmethod
    def createinstance(params):
        n_components = params.get("n_components", 10)
        return ProjectPCA(n_components=n_components)

    def __repr__(self):
        return "Components: {}".format(self.cspin.value())
Exemplo n.º 4
0
class CUR(BaseEditor):
    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self.setLayout(QVBoxLayout())

        self.rank = 10
        self.max_error = 1

        form = QFormLayout()
        self.rspin = QSpinBox(minimum=2, maximum=1000000, value=self.rank)
        self.rspin.valueChanged[int].connect(self.setR)
        self.rspin.editingFinished.connect(self.edited)
        self.espin = QDoubleSpinBox(minimum=0.1,
                                    maximum=100.0,
                                    singleStep=0.1,
                                    value=self.max_error)
        self.espin.valueChanged[float].connect(self.setE)
        self.espin.editingFinished.connect(self.edited)

        form.addRow("Rank:", self.rspin)
        form.addRow("Relative error:", self.espin)
        self.layout().addLayout(form)

    def setParameters(self, params):
        self.setR(params.get("rank", 10))
        self.setE(params.get("max_error", 1))

    def parameters(self):
        return {"rank": self.rank, "max_error": self.max_error}

    def setR(self, rank):
        if self.rank != rank:
            self.rank = rank
            self.rspin.setValue(rank)
            self.changed.emit()

    def setE(self, max_error):
        if self.max_error != max_error:
            self.max_error = max_error
            self.espin.setValue(max_error)
            self.changed.emit()

    @staticmethod
    def createinstance(params):
        rank = params.get("rank", 10)
        max_error = params.get("max_error", 1)
        return ProjectCUR(rank=rank, max_error=max_error)

    def __repr__(self):
        return "Rank: {}, Relative error: {}".format(self.rspin.value(),
                                                     self.espin.value())
Exemplo n.º 5
0
class CUR(BaseEditor):

    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self.setLayout(QVBoxLayout())

        self.rank = 10
        self.max_error = 1

        form = QFormLayout()
        self.rspin = QSpinBox(minimum=2, value=self.rank)
        self.rspin.valueChanged[int].connect(self.setR)
        self.rspin.editingFinished.connect(self.edited)
        self.espin = QDoubleSpinBox(
            minimum=0.1, maximum=100.0, singleStep=0.1,
            value=self.max_error)
        self.espin.valueChanged[float].connect(self.setE)
        self.espin.editingFinished.connect(self.edited)

        form.addRow("Rank:", self.rspin)
        form.addRow("Relative error:", self.espin)
        self.layout().addLayout(form)

    def setParameters(self, params):
        self.setR(params.get("rank", 10))
        self.setE(params.get("max_error", 1))

    def parameters(self):
        return {"rank": self.rank, "max_error": self.max_error}

    def setR(self, rank):
        if self.rank != rank:
            self.rank = rank
            self.rspin.setValue(rank)
            self.changed.emit()

    def setE(self, max_error):
        if self.max_error != max_error:
            self.max_error = max_error
            self.espin.setValue(max_error)
            self.changed.emit()

    @staticmethod
    def createinstance(params):
        rank = params.get("rank", 10)
        max_error = params.get("max_error", 1)
        return ProjectCUR(rank=rank, max_error=max_error)

    def __repr__(self):
        return "Rank: {}, Relative error: {}".format(self.rspin.value(),
                                                     self.espin.value())
Exemplo n.º 6
0
class FilteringModule(MultipleMethodModule):
    Stopwords, Lexicon, Regexp, DocFreq, DummyDocFreq, MostFreq = range(6)
    Methods = {
        Stopwords: StopwordsFilter,
        Lexicon: LexiconFilter,
        Regexp: RegexpFilter,
        DocFreq: FrequencyFilter,
        DummyDocFreq: FrequencyFilter,
        MostFreq: MostFrequentTokensFilter
    }
    DEFAULT_METHODS = [Stopwords]
    DEFAULT_LANG = "English"
    DEFAULT_NONE = None
    DEFAULT_PATTERN = "\.|,|:|;|!|\?|\(|\)|\||\+|\'|\"|‘|’|“|”|\'|" \
                      "\’|…|\-|–|—|\$|&|\*|>|<|\/|\[|\]"
    DEFAULT_FREQ_TYPE = 0  # 0 - relative freq, 1 - absolute freq
    DEFAULT_REL_START, DEFAULT_REL_END, REL_MIN, REL_MAX = 0.1, 0.9, 0, 1
    DEFAULT_ABS_START, DEFAULT_ABS_END, ABS_MIN, ABS_MAX = 1, 10, 0, 10000
    DEFAULT_N_TOKEN = 100

    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self.__sw_lang = self.DEFAULT_LANG
        self.__sw_file = self.DEFAULT_NONE
        self.__lx_file = self.DEFAULT_NONE
        self.__pattern = self.DEFAULT_PATTERN
        self.__freq_type = self.DEFAULT_FREQ_TYPE
        self.__rel_freq_st = self.DEFAULT_REL_START
        self.__rel_freq_en = self.DEFAULT_REL_END
        self.__abs_freq_st = self.DEFAULT_ABS_START
        self.__abs_freq_en = self.DEFAULT_ABS_END
        self.__n_token = self.DEFAULT_N_TOKEN
        self.__invalidated = False

        self.__combo = ComboBox(self, [_DEFAULT_NONE] +
                                StopwordsFilter.supported_languages(),
                                self.__sw_lang, self.__set_language)
        self.__sw_loader = FileLoader()
        self.__sw_loader.set_file_list()
        self.__sw_loader.activated.connect(self.__sw_loader_activated)
        self.__sw_loader.file_loaded.connect(self.__sw_invalidate)

        self.__lx_loader = FileLoader()
        self.__lx_loader.set_file_list()
        self.__lx_loader.activated.connect(self.__lx_loader_activated)
        self.__lx_loader.file_loaded.connect(self.__lx_invalidate)

        validator = RegexpFilter.validate_regexp
        self.__edit = ValidatedLineEdit(self.__pattern, validator)
        self.__edit.editingFinished.connect(self.__edit_finished)

        rel_freq_rb = QRadioButton("相对:")
        abs_freq_rb = QRadioButton("绝对:")
        self.__freq_group = group = QButtonGroup(self, exclusive=True)
        group.addButton(rel_freq_rb, 0)
        group.addButton(abs_freq_rb, 1)
        group.buttonClicked.connect(self.__freq_group_clicked)
        group.button(self.__freq_type).setChecked(True)

        self.__rel_range_spins = RangeDoubleSpins(self.__rel_freq_st, 0.05,
                                                  self.__rel_freq_en,
                                                  self.REL_MIN, self.REL_MAX,
                                                  self.__set_rel_freq_start,
                                                  self.__set_rel_freq_end,
                                                  self.__rel_spins_edited)
        self.__abs_range_spins = RangeSpins(self.__abs_freq_st, 1,
                                            self.__abs_freq_en, self.ABS_MIN,
                                            self.ABS_MAX,
                                            self.__set_abs_freq_start,
                                            self.__set_abs_freq_end,
                                            self.__abs_spins_edited)

        self.__spin_n = QSpinBox(minimum=1,
                                 maximum=10**6,
                                 value=self.__n_token)
        self.__spin_n.editingFinished.connect(self.__spin_n_edited)
        self.__spin_n.valueChanged.connect(self.changed)

        self.layout().addWidget(self.__combo, self.Stopwords, 1)
        self.layout().addWidget(self.__sw_loader.file_combo, self.Stopwords, 2,
                                1, 2)
        self.layout().addWidget(self.__sw_loader.browse_btn, self.Stopwords, 4)
        self.layout().addWidget(self.__sw_loader.load_btn, self.Stopwords, 5)
        self.layout().addWidget(self.__lx_loader.file_combo, self.Lexicon, 2,
                                1, 2)
        self.layout().addWidget(self.__lx_loader.browse_btn, self.Lexicon, 4)
        self.layout().addWidget(self.__lx_loader.load_btn, self.Lexicon, 5)
        self.layout().addWidget(self.__edit, self.Regexp, 1, 1, 5)
        spins = self.__rel_range_spins.spins()
        self.layout().addWidget(rel_freq_rb, self.DocFreq, 1)
        self.layout().addWidget(spins[0], self.DocFreq, 2)
        self.layout().addWidget(spins[1], self.DocFreq, 3)
        spins = self.__abs_range_spins.spins()
        self.layout().addWidget(abs_freq_rb, self.DummyDocFreq, 1)
        self.layout().addWidget(spins[0], self.DummyDocFreq, 2)
        self.layout().addWidget(spins[1], self.DummyDocFreq, 3)
        title = self.layout().itemAtPosition(self.DummyDocFreq, 0).widget()
        title.hide()
        self.layout().addWidget(self.__spin_n, self.MostFreq, 1)
        self.layout().setColumnStretch(3, 1)

    def __sw_loader_activated(self):
        self.__sw_file = self.__sw_loader.get_current_file()
        self.changed.emit()
        if self.Stopwords in self.methods:
            self.edited.emit()

    def __sw_invalidate(self):
        if self.Stopwords in self.methods and self.__sw_file:
            self.__invalidated = random.random()
            self.edited.emit()

    def __lx_loader_activated(self):
        self.__lx_file = self.__lx_loader.get_current_file()
        self.changed.emit()
        if self.Lexicon in self.methods:
            self.edited.emit()

    def __lx_invalidate(self):
        if self.Lexicon in self.methods and self.__lx_file:
            self.__invalidated = random.random()
            self.edited.emit()

    def __edit_finished(self):
        pattern = self.__edit.text()
        if self.__pattern != pattern:
            self.__set_pattern(pattern)
            if self.Regexp in self.methods:
                self.edited.emit()

    def __freq_group_clicked(self):
        i = self.__freq_group.checkedId()
        if self.__freq_type != i:
            self.__set_freq_type(i)
            if self.DocFreq in self.methods:
                self.edited.emit()

    def __rel_spins_edited(self):
        if self.DocFreq in self.methods and self.__freq_type == 0:
            self.edited.emit()

    def __abs_spins_edited(self):
        if self.DocFreq in self.methods and self.__freq_type == 1:
            self.edited.emit()

    def __spin_n_edited(self):
        n = self.__spin_n.value()
        if self.__n_token != n:
            self.__set_n_tokens(n)
            if self.MostFreq in self.methods:
                self.edited.emit()

    def setParameters(self, params: Dict):
        super().setParameters(params)
        self.__set_language(params.get("language", self.DEFAULT_LANG))
        self.__set_sw_path(params.get("sw_path", self.DEFAULT_NONE),
                           params.get("sw_list", []))
        self.__set_lx_path(params.get("lx_path", self.DEFAULT_NONE),
                           params.get("lx_list", []))
        self.__set_pattern(params.get("pattern", self.DEFAULT_PATTERN))
        self.__set_freq_type(params.get("freq_type", self.DEFAULT_FREQ_TYPE))
        self.__set_rel_freq_range(
            params.get("rel_start", self.DEFAULT_REL_START),
            params.get("rel_end", self.DEFAULT_REL_END))
        self.__set_abs_freq_range(
            params.get("abs_start", self.DEFAULT_ABS_START),
            params.get("abs_end", self.DEFAULT_ABS_END))
        self.__set_n_tokens(params.get("n_tokens", self.DEFAULT_N_TOKEN))
        self.__invalidated = False

    def __set_language(self, language: str):
        if self.__sw_lang != language:
            self.__sw_lang = language
            self.__combo.setCurrentText(language)
            self.changed.emit()
            if self.Stopwords in self.methods:
                self.edited.emit()

    def __set_sw_path(self, path: RecentPath, paths: List[RecentPath] = []):
        self.__sw_loader.recent_paths = paths
        self.__sw_loader.set_file_list()
        self.__sw_loader.set_current_file(_to_abspath(path))
        self.__sw_file = self.__sw_loader.get_current_file()

    def __set_lx_path(self, path: RecentPath, paths: List[RecentPath] = []):
        self.__lx_loader.recent_paths = paths
        self.__lx_loader.set_file_list()
        self.__lx_loader.set_current_file(_to_abspath(path))
        self.__lx_file = self.__lx_loader.get_current_file()

    def __set_pattern(self, pattern: str):
        if self.__pattern != pattern:
            self.__pattern = pattern
            self.__edit.setText(pattern)
            self.changed.emit()

    def __set_freq_type(self, freq_type: int):
        if self.__freq_type != freq_type:
            self.__freq_type = freq_type
            self.__freq_group.button(self.__freq_type).setChecked(True)
            self.changed.emit()

    def __set_rel_freq_range(self, start: float, end: float):
        self.__set_rel_freq_start(start)
        self.__set_rel_freq_end(end)
        self.__rel_range_spins.set_range(start, end)

    def __set_rel_freq_start(self, n: float):
        if self.__rel_freq_st != n:
            self.__rel_freq_st = n
            self.changed.emit()

    def __set_rel_freq_end(self, n: float):
        if self.__rel_freq_en != n:
            self.__rel_freq_en = n
            self.changed.emit()

    def __set_abs_freq_range(self, start: int, end: int):
        self.__set_abs_freq_start(start)
        self.__set_abs_freq_end(end)
        self.__abs_range_spins.set_range(start, end)

    def __set_abs_freq_start(self, n: int):
        if self.__abs_freq_st != n:
            self.__abs_freq_st = n
            self.changed.emit()

    def __set_abs_freq_end(self, n: int):
        if self.__abs_freq_en != n:
            self.__abs_freq_en = n
            self.changed.emit()

    def __set_n_tokens(self, n: int):
        if self.__n_token != n:
            self.__n_token = n
            self.__spin_n.setValue(n)
            self.changed.emit()

    def parameters(self) -> Dict:
        params = super().parameters()
        params.update({
            "language": self.__sw_lang,
            "sw_path": self.__sw_file,
            "sw_list": self.__sw_loader.recent_paths,
            "lx_path": self.__lx_file,
            "lx_list": self.__lx_loader.recent_paths,
            "pattern": self.__pattern,
            "freq_type": self.__freq_type,
            "rel_start": self.__rel_freq_st,
            "rel_end": self.__rel_freq_en,
            "abs_start": self.__abs_freq_st,
            "abs_end": self.__abs_freq_en,
            "n_tokens": self.__n_token,
            "invalidated": self.__invalidated
        })
        return params

    @staticmethod
    def createinstance(params: Dict) -> List[BaseTokenFilter]:
        def map_none(s):
            return "" if s == _DEFAULT_NONE else s

        methods = params.get("methods", FilteringModule.DEFAULT_METHODS)
        filters = []
        if FilteringModule.Stopwords in methods:
            lang = params.get("language", FilteringModule.DEFAULT_LANG)
            path = params.get("sw_path", FilteringModule.DEFAULT_NONE)
            filters.append(
                StopwordsFilter(language=map_none(lang),
                                path=_to_abspath(path)))
        if FilteringModule.Lexicon in methods:
            path = params.get("lx_path", FilteringModule.DEFAULT_NONE)
            filters.append(LexiconFilter(path=_to_abspath(path)))
        if FilteringModule.Regexp in methods:
            pattern = params.get("pattern", FilteringModule.DEFAULT_PATTERN)
            filters.append(RegexpFilter(pattern=pattern))
        if FilteringModule.DocFreq in methods:
            if params.get("freq_type", FilteringModule.DEFAULT_FREQ_TYPE) == 0:
                st = params.get("rel_start", FilteringModule.DEFAULT_REL_START)
                end = params.get("rel_end", FilteringModule.DEFAULT_REL_END)
            else:
                st = params.get("abs_start", FilteringModule.DEFAULT_ABS_START)
                end = params.get("abs_end", FilteringModule.DEFAULT_ABS_END)
            filters.append(FrequencyFilter(min_df=st, max_df=end))
        if FilteringModule.MostFreq in methods:
            n = params.get("n_tokens", FilteringModule.DEFAULT_N_TOKEN)
            filters.append(MostFrequentTokensFilter(keep_n=n))
        return filters

    def __repr__(self):
        texts = []
        for method in self.methods:
            if method == self.Stopwords:
                append = f"Language: {self.__sw_lang}, " \
                         f"File: {_to_abspath(self.__sw_file)}"
            elif method == self.Lexicon:
                append = f"File: {_to_abspath(self.__lx_file)}"
            elif method == self.Regexp:
                append = f"{self.__pattern}"
            elif method == self.DocFreq:
                if self.__freq_type == 0:
                    append = f"[{self.__rel_freq_st}, {self.__rel_freq_en}]"
                else:
                    append = f"[{self.__abs_freq_st}, {self.__abs_freq_en}]"
            elif method == self.MostFreq:
                append = f"{self.__n_token}"
            texts.append(f"{self.Methods[method].name} ({append})")
        return ", ".join(texts)
Exemplo n.º 7
0
class StandardizeEditor(ScBaseEditor):
    DEFAULT_LOWER_CLIP = False
    DEFAULT_UPPER_CLIP = False
    DEFAULT_LOWER_BOUND = -10
    DEFAULT_UPPER_BOUND = 10

    def __init__(self, parent=None, **kwargs):
        super().__init__(parent, **kwargs)
        self._lower_bound = self.DEFAULT_LOWER_BOUND
        self._upper_bound = self.DEFAULT_UPPER_BOUND

        self.setLayout(QVBoxLayout())

        box = QGroupBox(title="Clipping", flat=True)
        form = QFormLayout()
        self.lower_check = QCheckBox("Lower Bound: ")
        self.lower_check.clicked.connect(self.edited)
        self.lower_spin = QSpinBox(minimum=-99,
                                   maximum=0,
                                   value=self._lower_bound)
        self.lower_spin.valueChanged[int].connect(self._set_lower_bound)
        self.lower_spin.editingFinished.connect(self.edited)

        self.upper_check = QCheckBox("Upper Bound: ")
        self.upper_check.clicked.connect(self.edited)
        self.upper_spin = QSpinBox(value=self._upper_bound)
        self.upper_spin.valueChanged[int].connect(self._set_upper_bound)
        self.upper_spin.editingFinished.connect(self.edited)

        form.addRow(self.lower_check, self.lower_spin)
        form.addRow(self.upper_check, self.upper_spin)
        box.setLayout(form)
        self.layout().addWidget(box)

    def _set_lower_bound(self, x):
        if self._lower_bound != x:
            self._lower_bound = x
            self.lower_spin.setValue(x)
            self.changed.emit()

    def _set_upper_bound(self, x):
        if self._upper_bound != x:
            self._upper_bound = x
            self.upper_spin.setValue(x)
            self.changed.emit()

    def setParameters(self, params):
        lower_clip = params.get("lower_clip", self.DEFAULT_LOWER_CLIP)
        self.lower_check.setChecked(lower_clip)
        self._set_lower_bound(params.get("lower", self.DEFAULT_LOWER_BOUND))
        upper_clip = params.get("upper_clip", self.DEFAULT_UPPER_CLIP)
        self.upper_check.setChecked(upper_clip)
        self._set_upper_bound(params.get("upper", self.DEFAULT_UPPER_BOUND))

    def parameters(self):
        return {
            "lower_clip": self.lower_check.isChecked(),
            "lower": self._lower_bound,
            "upper_clip": self.upper_check.isChecked(),
            "upper": self._upper_bound
        }

    @staticmethod
    def createinstance(params):
        lower, upper = None, None
        if params.get("lower_clip", StandardizeEditor.DEFAULT_LOWER_CLIP):
            lower = params.get("lower", StandardizeEditor.DEFAULT_LOWER_BOUND)
        if params.get("upper_clip", StandardizeEditor.DEFAULT_UPPER_CLIP):
            upper = params.get("upper", StandardizeEditor.DEFAULT_UPPER_BOUND)
        return Standardize(lower, upper)

    def __repr__(self):
        clips = []
        if self.lower_check.isChecked():
            clips.append("Lower Bound: {}".format(self.lower_spin.value()))
        if self.upper_check.isChecked():
            clips.append("Upper Bound: {}".format(self.upper_spin.value()))
        return ", ".join(clips) if clips else "No Clipping"