def check(self, filenames): # debug print print('\n\n') for each in filenames: file = open(each, 'r') string = file.read() words = Splitter.imp_split(Splitter.detect_const(string)) # initialize window window = ['\n'] * (self.window_left + 1) for word in words[:self.window_right]: window.append(word) cur_word_index = self.window_left # process checking for each word for new_word in words[self.window_right:]: # shift window for i in range(0, self.window_len - 1): window[i] = window[i + 1] window[self.window_len - 1] = new_word # debug print print('Window: ' + repr(window)) # make indexes (current is first) indexes = [ window[cur_word_index] if window[cur_word_index] in self.dictionary else self.unknown ] for word in window: if word is not window[cur_word_index]: indexes.append(word if word in self.dictionary else self.unknown) # debug print print('Indexes: ' + repr(indexes)) # actual checking # this part is undiscussed and likely to be wrong if words[cur_word_index] == self.unknown: if multidict_operation(self.counters, indexes, id) == 0: print('Word {} can be wrong'.format( window[cur_word_index])) else: if multidict_operation(self.counters, indexes, id) < \ multidict_operation(self.counters[self.unknown], indexes[1:], id): print('Word {} can be wrong'.format( window[cur_word_index])) # # debug print print()
def check(self, filenames): # debug print print('\n\n') for each in filenames: file = open(each, 'r') string = file.read() words = Splitter.imp_split(Splitter.detect_const(string)) # initialize window window = ['\n'] * (self.window_left + 1) for word in words[:self.window_right]: window.append(word) cur_word_index = self.window_left # process checking for each word for new_word in words[self.window_right:]: # shift window for i in range(0, self.window_len - 1): window[i] = window[i + 1] window[self.window_len - 1] = new_word # debug print print('Window: ' + repr(window)) # make indexes (current is first) indexes = [window[cur_word_index] if window[cur_word_index] in self.dictionary else self.unknown] for word in window: if word is not window[cur_word_index]: indexes.append(word if word in self.dictionary else self.unknown) # debug print print('Indexes: ' + repr(indexes)) # actual checking # this part is undiscussed and likely to be wrong if words[cur_word_index] == self.unknown: if multidict_operation(self.counters, indexes, id) == 0: print('Word {} can be wrong'.format(window[cur_word_index])) else: if multidict_operation(self.counters, indexes, id) < \ multidict_operation(self.counters[self.unknown], indexes[1:], id): print('Word {} can be wrong'.format(window[cur_word_index])) # # debug print print()
def __init__(self, stop_syn=None,useSplitter=None,extra=None): self.clear() if stop_syn is None: self.stop_syn = {} else: self.stop_syn = stop_syn self.useSplitter = Splitter.splitterNames[0] if useSplitter: self.useSplitter=useSplitter self.splitterParams = extra self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
def initSplitter(self): import Splitter stop_words=( 'am', 'ii', 'iii', 'per', 'po', 're', 'a', 'about', 'above', 'across', 'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst', 'amount', 'an', 'and', 'another', 'any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around', 'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both', 'bottom', 'but', 'by', 'can', 'cannot', 'cant', 'con', 'could', 'couldnt', 'cry', 'describe', 'detail', 'do', 'done', 'down', 'due', 'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else', 'elsewhere', 'empty', 'enough', 'even', 'ever', 'every', 'everyone', 'everything', 'everywhere', 'except', 'few', 'fifteen', 'fifty', 'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly', 'forty', 'found', 'four', 'from', 'front', 'full', 'further', 'get', 'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'however', 'hundred', 'i', 'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it', 'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least', 'less', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mill', 'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must', 'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless', 'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once', 'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 'per', 'perhaps', 'please', 'pre', 'put', 'rather', 're', 'same', 'see', 'seem', 'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should', 'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhere', 'still', 'such', 'take', 'ten', 'than', 'that', 'the', 'their', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these', 'they', 'thick', 'thin', 'third', 'this', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under', 'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well', 'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without', 'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves', ) self.stop_word_dict={} for word in stop_words: self.stop_word_dict[word]=None self.splitterobj = Splitter.getSplitter() self.init = 1
def getInfo(persons, text, lan, splitter=None): if splitter != None: textSent = text.split(u"\r\n") else: if lan == "RU": textSent = Splitter.split_into_sentences(text.replace(".", ". ")) else: textSent = sent_tokenize(text) need_sentences = [] print("text = ", textSent) for i in textSent: for person in persons: if person in i: need_sentences.append(i) return need_sentences
def __init__(self, parent=None): super(MainWindow, self).__init__(_get_maya_main_window()) self.resize(1250, 750) self.setWindowTitle(u'assets') assets_menu = AssetsMenu.LeftTitleWidget() self.splitter_window = Splitter.Splitter() assets_menu.button_list[0].clicked.connect( partial(self.change_dir, 'ma_root_dir')) assets_menu.button_list[1].clicked.connect( partial(self.change_dir, 'tex_root_dir')) assets_menu.button_list[2].clicked.connect( partial(self.change_dir, 'hdr_root_dir')) widget = QtWidgets.QWidget(self) HBox = QtWidgets.QHBoxLayout(widget) HBox.addWidget(assets_menu) HBox.addWidget(self.splitter_window) self.setCentralWidget(widget) help = QtWidgets.QAction(QtGui.QIcon('icons/help.png'), u'no write', self) help.setShortcut('Ctrl+Q') help.setStatusTip('help application') set_dir = QtWidgets.QAction(QtGui.QIcon('icons/help.png'), u'set root dir', self) set_dir.setShortcut('Ctrl+O') set_dir.setStatusTip('help application') set_dir.triggered.connect(self.root_dir_window) self.statusBar() menubar = self.menuBar() sets_menu = menubar.addMenu(u'&设置') help_menu = menubar.addMenu(u'&帮助') help_menu.addAction(help) sets_menu.addAction(set_dir)
def __init__(self,useSplitter=None,extra=None): self.clear() self.useSplitter = useSplitter self.splitterParams = extra self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
#!/usr/bin/env python import Splitter import Tagger import Scorer import Importer import yaml from pprint import pprint if __name__ == "__main__": rss = Importer.RSSImporter( 'https://news.google.com/news/feeds?q=apple&output=rss') input_text = rss.parse() s = Splitter.Splitter() tagger = Tagger.DictionaryTagger([ 'dicts/positive.yml', 'dicts/negative.yml', 'dicts/inc.yml', 'dicts/dec.yml', 'dicts/inv.yml' ]) scorer = Scorer.Scorer() total = 0 for summary in input_text: split = s.split(summary) tagged = tagger.tag(split) score = scorer.score(tagged) print "%s -> %d" % (summary, score) total += score
def __init__(self, useSplitter=None, extra=None): self.clear() self.useSplitter = useSplitter self.splitterParams = extra self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
import DHT import Splitter from DHT import DistributedHashTable md5hashstring='' count=1 md5hashstring=str(Splitter.split()) distribute=DistributedHashTable(md5hashstring) distribute.tochararray() print distribute.chararray distribute.createkeyvaluepairs() print distribute.pieces peerlist=[1,2,3,4] print md5hashstring
def __init__(self): QtWidgets.QFrame.__init__(self) self.setWindowFlags(QtCore.Qt.WindowStaysOnTopHint) self.setWindowTitle('Name Tool') self.setFixedHeight(285) self.setFixedWidth(320) self.setLayout(QtWidgets.QVBoxLayout()) self.layout().setContentsMargins(5, 5, 5, 5) self.layout().setSpacing(0) self.layout().setAlignment(QtCore.Qt.AlignTop) # Rename Widget rename_widget = QtWidgets.QWidget() # Widget holding upper name stuff rename_widget.setLayout(QtWidgets.QVBoxLayout()) rename_widget.layout().setContentsMargins(0, 0, 0, 0) rename_widget.layout().setSpacing(2) rename_widget.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) self.layout().addWidget(rename_widget) # Label Splitter rename_splitter = Splitter('Rename') # Custom splitter widget rename_widget.layout().addWidget(rename_splitter) # Rename Input rename_text_layout = QtWidgets.QHBoxLayout() rename_text_layout.setContentsMargins(4, 0, 4, 0) rename_text_layout.setSpacing(2) rename_widget.layout().addLayout(rename_text_layout) rename_text_label = QtWidgets.QLabel('Rename: ') self.rename_line_edit = QtWidgets.QLineEdit() self.rename_line_edit.setPlaceholderText('C_objectName_##_CTRL') # Grey text rename_text_layout.addWidget(rename_text_label) rename_text_layout.addWidget(self.rename_line_edit) # Regular Expression # () indicates excluding these symbols, [] indicates accepts these # Having a ^ between symbols indicates all symbols between are included reg_ex = QtCore.QRegExp('^(?!@$^_)[a-zA-Z_#]+') text_validator = QtGui.QRegExpValidator(reg_ex, self.rename_line_edit) self.rename_line_edit.setValidator(text_validator) rename_widget.layout().addLayout(Splitter.SplitterLayout()) # AlphaNumeric Options rename_alphanumberic_layout = QtWidgets.QHBoxLayout() rename_alphanumberic_layout.setContentsMargins(4, 0, 4, 0) rename_alphanumberic_layout.setSpacing(2) rename_widget.layout().addLayout(rename_alphanumberic_layout) rename_alphanumberic_label = QtWidgets.QLabel('Name List Method: ') self.rename_alpha_radio = QtWidgets.QRadioButton('Alpha') self.rename_number_radio = QtWidgets.QRadioButton('Numbers') self.rename_number_radio.setChecked(True) self.rename_alpha_radio.setFixedHeight(19) rename_alphanumberic_layout.addWidget(rename_alphanumberic_label) rename_alphanumberic_layout.addSpacerItem( QtWidgets.QSpacerItem(5, 5, QtWidgets.QSizePolicy.Expanding) ) rename_alphanumberic_layout.addWidget(self.rename_alpha_radio) rename_alphanumberic_layout.addWidget(self.rename_number_radio) # Hidden Upper/Lower Case buttons rename_options_layout = QtWidgets.QHBoxLayout() rename_options_layout.setContentsMargins(4, 0, 4, 0) rename_options_layout.setSpacing(2) rename_widget.layout().addLayout(rename_options_layout) self.alpha_case_group = QtWidgets.QButtonGroup() self.lower_radio = QtWidgets.QRadioButton('Lowercase') self.upper_radio = QtWidgets.QRadioButton('Uppercase') self.alpha_case_group.addButton(self.lower_radio) self.alpha_case_group.addButton(self.upper_radio) self.lower_radio.setVisible(False) self.upper_radio.setVisible(False) self.lower_radio.setFixedHeight(19) self.upper_radio.setFixedHeight(19) self.upper_radio.setChecked(True) rename_options_layout.addWidget(self.lower_radio) rename_options_layout.addSpacerItem( QtWidgets.QSpacerItem(5, 5, QtWidgets.QSizePolicy.Expanding) ) rename_options_layout.addWidget(self.upper_radio) # Starting Number rename_starting_number_layout = QtWidgets.QHBoxLayout() rename_starting_number_layout.setContentsMargins(4, 0, 4, 0) rename_starting_number_layout.setSpacing(2) rename_widget.layout().addLayout(rename_starting_number_layout) self.rename_start_label = QtWidgets.QLabel('Starting Number: ') self.rename_start_number = QtWidgets.QSpinBox() self.rename_start_number.setFixedWidth(50) self.rename_start_number.setMinimum(0) self.rename_start_number.setMaximum(999) rename_starting_number_layout.addWidget(self.rename_start_label) rename_starting_number_layout.addWidget(self.rename_start_number) rename_widget.layout().addLayout(Splitter.SplitterLayout()) # Execute List Rename Button rename_button_layout = QtWidgets.QHBoxLayout() rename_button_layout.setContentsMargins(4, 0, 4, 0) rename_button_layout.setSpacing(0) rename_widget.layout().addLayout(rename_button_layout) self.rename_label = QtWidgets.QLabel('') rename_button = QtWidgets.QPushButton('Rename') rename_button.setFixedHeight(20) rename_button.setFixedWidth(55) rename_button_layout.addWidget(self.rename_label) rename_button_layout.addWidget(rename_button) # Replace Widget replace_widget = QtWidgets.QWidget() # Widget holding lower name stuff replace_widget.setLayout(QtWidgets.QVBoxLayout()) replace_widget.layout().setContentsMargins(0, 0, 0, 0) replace_widget.layout().setSpacing(2) replace_widget.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) self.layout().addWidget(replace_widget) replace_splitter = Splitter.Splitter('Find & Replace') # Custom splitter widget replace_widget.layout().addWidget(replace_splitter) find_label = QtWidgets.QLabel('Find: ') self.find_line_edit = QtWidgets.QLineEdit() replace_label = QtWidgets.QLabel('Replace: ') self.replace_line_edit = QtWidgets.QLineEdit() find_label.setFixedWidth(55) replace_label.setFixedWidth(55) reg_ex = QtCore.QRegExp('[a-zA-Z_]+') text_validator = QtGui.QRegExpValidator(reg_ex, self.rename_line_edit) self.find_line_edit.setValidator(text_validator) self.replace_line_edit.setValidator(text_validator) find_layout = QtWidgets.QHBoxLayout() find_layout.setContentsMargins(4, 0, 4, 0) find_layout.setSpacing(2) find_layout.addWidget(find_label) find_layout.addWidget(self.find_line_edit) replace_widget.layout().addLayout(find_layout) replace_layout = QtWidgets.QHBoxLayout() replace_layout.setContentsMargins(4, 0, 4, 0) replace_layout.setSpacing(2) replace_layout.addWidget(replace_label) replace_layout.addWidget(self.replace_line_edit) replace_widget.layout().addLayout(replace_layout) replace_widget.layout().addLayout(Splitter.SplitterLayout()) selection_layout = QtWidgets.QHBoxLayout() selection_layout.setContentsMargins(4, 0, 4, 0) selection_layout.setSpacing(2) replace_widget.layout().addLayout(selection_layout) selection_mode_label = QtWidgets.QLabel('Selection Mode: ') self.selected_radio_button = QtWidgets.QRadioButton('Selected') self.selected_radio_button.setFixedHeight(19) self.selected_radio_button.setChecked(True) self.hierarchy_radio_button = QtWidgets.QRadioButton('Hierarchy') self.hierarchy_radio_button.setFixedHeight(19) selection_layout.addWidget(selection_mode_label) spacer_item = QtWidgets.QSpacerItem(5, 5, QtWidgets.QSizePolicy.Expanding) selection_layout.addSpacerItem(spacer_item) selection_layout.addWidget(self.selected_radio_button) selection_layout.addWidget(self.hierarchy_radio_button) replace_widget.layout().addLayout(Splitter.SplitterLayout()) replace_button = QtWidgets.QPushButton('Replace') replace_button.setFixedHeight(20) replace_button.setFixedWidth(55) replace_button_layout = QtWidgets.QHBoxLayout() replace_button_layout.setContentsMargins(4, 0, 4, 0) replace_button_layout.setSpacing(0) replace_button_layout.setAlignment(QtCore.Qt.AlignRight) replace_button_layout.addWidget(replace_button) replace_widget.layout().addLayout(replace_button_layout) # State Change modifiers # Need to set the changed status of the alphanumeric radio buttons to # influence the _toggle_rename_vis() function self.rename_alpha_radio.clicked.connect(self._toggle_rename_vis) self.rename_number_radio.clicked.connect(self._toggle_rename_vis) self.lower_radio.clicked.connect(self._update_example) self.upper_radio.clicked.connect(self._update_example) self.rename_start_number.valueChanged.connect(self._update_example) self.rename_line_edit.textChanged.connect(self._update_example) rename_button.clicked.connect(self.list_rename) replace_button.clicked.connect(self.replace_text) self._update_example()
def __init__(self): QtWidgets.QFrame.__init__(self) self.setWindowFlags(QtCore.Qt.WindowStaysOnTopHint) self.setLayout(QtWidgets.QVBoxLayout()) self.layout().setContentsMargins(1, 1, 1, 1) self.layout().setSpacing(0) self.layout().setAlignment(QtCore.Qt.AlignTop) node_widget = QtWidgets.QWidget() node_widget.setLayout(QtWidgets.QVBoxLayout()) node_widget.layout().setContentsMargins(2, 2, 2, 2) node_widget.layout().setSpacing(5) node_widget.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) self.layout().addWidget(node_widget) # Regular node layouts name_layout = QtWidgets.QHBoxLayout() type_layout = QtWidgets.QHBoxLayout() button_layout = QtWidgets.QHBoxLayout() # Duplicate node layouts find_layout = QtWidgets.QHBoxLayout() replace_layout = QtWidgets.QHBoxLayout() dup_button_layout = QtWidgets.QHBoxLayout() # Plugin node layouts plugin_name_layout = QtWidgets.QHBoxLayout() plugin_type_layout = QtWidgets.QHBoxLayout() plugin_button_layout = QtWidgets.QHBoxLayout() node_widget.layout().addLayout(name_layout) node_widget.layout().addLayout(type_layout) node_widget.layout().addLayout(button_layout) node_widget.layout().addLayout(Splitter.SplitterLayout()) node_widget.layout().addLayout(find_layout) node_widget.layout().addLayout(replace_layout) node_widget.layout().addLayout(dup_button_layout) node_widget.layout().addLayout(Splitter.SplitterLayout()) node_widget.layout().addLayout(plugin_name_layout) node_widget.layout().addLayout(plugin_type_layout) node_widget.layout().addLayout(plugin_button_layout) # Create Nodes widgets --------------------------------------------- # node_label = QtWidgets.QLabel('Node Name:') self.input_node_name = QtWidgets.QLineEdit() self.input_node_name.setPlaceholderText('prefix_nodeName') # Grey text reg_ex = QtCore.QRegExp('^(?!@$^_)[a-zA-Z_#_0-9]+') text_validator = QtGui.QRegExpValidator(reg_ex, self.input_node_name) self.input_node_name.setValidator(text_validator) name_layout.addWidget(node_label) name_layout.addWidget(self.input_node_name) node_type_label = QtWidgets.QLabel('Node Type:') self.node_type_combo = QtWidgets.QComboBox() # Adding combo box items for node options for node in sorted(node_data.NODE_NAME_DICTIONARY.keys()): if node[0] == node[0].lower(): self.node_type_combo.addItem(node) type_layout.addWidget(node_type_label) type_layout.addWidget(self.node_type_combo) # Show Node name example self.node_display_example = QtWidgets.QLabel('') create_node_button = QtWidgets.QPushButton('Create Node') button_layout.addWidget(self.node_display_example) button_layout.addWidget(create_node_button) # Duplicate Node Network widgets ------------------------------------ # find_label = QtWidgets.QLabel('Find:') self.find_name = QtWidgets.QLineEdit() self.find_name.setPlaceholderText('L_') # Grey text find_layout.addWidget(find_label) find_layout.addWidget(self.find_name) # Create Plugin Node widgets ---------------------------------------- # plugin_node_label = QtWidgets.QLabel('Custom Node Name:') self.input_plugin_node_name = QtWidgets.QLineEdit() self.input_plugin_node_name.setPlaceholderText( 'prefix_nodeName') # Grey text plugin_text_validator = QtGui.QRegExpValidator( reg_ex, self.input_plugin_node_name) self.input_plugin_node_name.setValidator(plugin_text_validator) plugin_name_layout.addWidget(plugin_node_label) plugin_name_layout.addWidget(self.input_plugin_node_name) plugin_node_type_label = QtWidgets.QLabel('Plugin Node Type:') self.plugin_node_type_combo = QtWidgets.QComboBox() # Adding combo box items for node options for node in sorted(plugin_node_name_dictionary.keys()): if node[0] == node[0].lower(): self.plugin_node_type_combo.addItem(node) plugin_type_layout.addWidget(plugin_node_type_label) plugin_type_layout.addWidget(self.plugin_node_type_combo) # Show Node name example self.plugin_node_display_example = QtWidgets.QLabel('') create_plugin_node_button = QtWidgets.QPushButton('Create Node') plugin_button_layout.addWidget(self.plugin_node_display_example) plugin_button_layout.addWidget(create_plugin_node_button) # Duplicate Node Network widgets ------------------------------------ # replace_label = QtWidgets.QLabel('Replace:') self.replace_name = QtWidgets.QLineEdit() self.replace_name.setPlaceholderText('R_') # Grey text replace_layout.addWidget(replace_label) replace_layout.addWidget(self.replace_name) duplicate_network_button = QtWidgets.QPushButton( 'Duplicate Node Network') dup_button_layout.addWidget(duplicate_network_button) # ------------------------------------------------------------------- # self.node_type_combo.currentIndexChanged.connect( self._update_node_name) self.plugin_node_type_combo.currentIndexChanged.connect( self._update_plugin_node_name) self.input_node_name.textChanged.connect(self._update_node_name) self.input_plugin_node_name.textChanged.connect( self._update_plugin_node_name) create_node_button.clicked.connect(self._get_node_settings) duplicate_network_button.clicked.connect(self._duplicate_node_network) create_plugin_node_button.clicked.connect( self._get_plugin_node_settings) self._update_node_name()
import SIC_Compiler # INPUTPATH should be the name of the csv containing raw extractor output. # INPUTPATH = "SP_v6_raw.csv" INPUTPATH = "dow_v5_raw.csv" # For each model that you want to make, put a tuple in MODELS_TO_TEST (tags, kind, threshold) # MODELS_TO_TEST = [('all', 'boolean', 0.2),('all', 'boolean', 1.0),('all', 'freq', 0.2),('all', 'freq', 1.0),('all', 'tfidf', 0.2),('all', 'tfidf', 1.0),('nouns', 'boolean', 0.2),('nouns', 'boolean', 1.0),('nouns', 'freq', 0.2),('nouns', 'freq', 1.0),('nouns', 'tfidf', 0.2),('nouns', 'tfidf', 1.0)] # MODELS_TO_TEST = [('sic', 'tfidf', 0.2),('sic', 'tfidf', 1.0),('sic', 'boolean', 0.2),('sic', 'boolean', 1.0),('sic', 'freq', 0.2),('sic', 'freq', 1.0)] # MODELS_TO_TEST = [('nouns', 'tfidf', 1.0)] MODELS_TO_TEST = [('sic', 'freq', 1.0)] PLOT_HEAT = True PLOT_SHUFFLE = False for filename in os.listdir('test_data'): os.unlink('test_data/' + filename) Splitter.splitfile(INPUTPATH) # for i in range(0, 10): # SIC_Compiler.compile_sic_groups("sic_descriptions", runID = str(i)) for modelspecs in MODELS_TO_TEST: modelname = modelspecs[0] + "_" + modelspecs[1] + "_" + str( modelspecs[2]) + "_built_on_" + INPUTPATH[:-8] # modelname = modelspecs[0] + "_" + modelspecs[1] + "_" + str(modelspecs[2]) + "_" + INPUTPATH[:-8] outputpath = INPUTPATH[:-8] + "_WV_model" # outputpath = "dow_v5_12_models" if not os.path.exists(outputpath): os.mkdir(outputpath) PLOTPATHS = [] if PLOT_HEAT: PLOTPATHS.append(outputpath + "/" + modelname + "_similarities.csv")
def __init__(self): QtWidgets.QFrame.__init__(self) self.setWindowFlags(QtCore.Qt.WindowStaysOnTopHint) self.setWindowTitle('Name Tool') self.setMinimumHeight(285) self.setMinimumWidth(320) self.setLayout(QtWidgets.QVBoxLayout()) self.layout().setContentsMargins(5, 5, 5, 5) self.layout().setSpacing(0) self.layout().setAlignment(QtCore.Qt.AlignTop) # Rename Widget rename_widget = QtWidgets.QWidget() # Widget holding upper name stuff rename_widget.setLayout(QtWidgets.QVBoxLayout()) rename_widget.layout().setContentsMargins(0, 0, 0, 0) rename_widget.layout().setSpacing(2) rename_widget.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) self.layout().addWidget(rename_widget) # Label Splitter rename_splitter = Splitter.Splitter('Rename') # Custom splitter widget rename_widget.layout().addWidget(rename_splitter) # Rename Input rename_text_layout = QtWidgets.QHBoxLayout() rename_text_layout.setContentsMargins(4, 0, 4, 0) rename_text_layout.setSpacing(2) rename_widget.layout().addLayout(rename_text_layout) rename_text_label = QtWidgets.QLabel('Rename: ') self.rename_line_edit = QtWidgets.QLineEdit() self.rename_line_edit.setPlaceholderText( 'Component_Side_objectName_##_CTL') # Grey text rename_text_layout.addWidget(rename_text_label) rename_text_layout.addWidget(self.rename_line_edit) # Regular Expression # () indicates excluding these symbols, [] indicates accepts these # Having a ^ between symbols indicates all symbols between are included reg_ex = QtCore.QRegExp('^(?!@$^_)[0-9a-zA-Z_#]+') text_validator = QtGui.QRegExpValidator(reg_ex, self.rename_line_edit) self.rename_line_edit.setValidator(text_validator) rename_widget.layout().addLayout(Splitter.SplitterLayout()) # AlphaNumeric Options rename_alphanumberic_layout = QtWidgets.QHBoxLayout() rename_alphanumberic_layout.setContentsMargins(4, 0, 4, 0) rename_alphanumberic_layout.setSpacing(2) rename_widget.layout().addLayout(rename_alphanumberic_layout) rename_alphanumberic_label = QtWidgets.QLabel('Name List Method: ') self.rename_alpha_radio = QtWidgets.QRadioButton('Alpha') self.rename_number_radio = QtWidgets.QRadioButton('Numbers') self.rename_number_radio.setChecked(True) self.rename_alpha_radio.setFixedHeight(19) rename_alphanumberic_layout.addWidget(rename_alphanumberic_label) rename_alphanumberic_layout.addSpacerItem( QtWidgets.QSpacerItem(5, 5, QtWidgets.QSizePolicy.Expanding) ) rename_alphanumberic_layout.addWidget(self.rename_alpha_radio) rename_alphanumberic_layout.addWidget(self.rename_number_radio) # Hidden Upper/Lower Case buttons rename_options_layout = QtWidgets.QHBoxLayout() rename_options_layout.setContentsMargins(4, 0, 4, 0) rename_options_layout.setSpacing(2) rename_widget.layout().addLayout(rename_options_layout) self.alpha_case_group = QtWidgets.QButtonGroup() self.lower_radio = QtWidgets.QRadioButton('Lowercase') self.upper_radio = QtWidgets.QRadioButton('Uppercase') self.alpha_case_group.addButton(self.lower_radio) self.alpha_case_group.addButton(self.upper_radio) self.lower_radio.setVisible(False) self.upper_radio.setVisible(False) self.lower_radio.setFixedHeight(19) self.upper_radio.setFixedHeight(19) self.upper_radio.setChecked(True) rename_options_layout.addWidget(self.lower_radio) rename_options_layout.addSpacerItem( QtWidgets.QSpacerItem(5, 5, QtWidgets.QSizePolicy.Expanding) ) rename_options_layout.addWidget(self.upper_radio) # Starting Number rename_starting_number_layout = QtWidgets.QHBoxLayout() rename_starting_number_layout.setContentsMargins(4, 0, 4, 0) rename_starting_number_layout.setSpacing(2) rename_widget.layout().addLayout(rename_starting_number_layout) self.rename_start_label = QtWidgets.QLabel('Starting Number: ') self.rename_start_number = QtWidgets.QSpinBox() self.rename_start_number.setFixedWidth(50) self.rename_start_number.setMinimum(0) self.rename_start_number.setMaximum(999) self.list_end_condition_label = QtWidgets.QLabel('End with "END":') self.list_end_condition_checkbox = QtWidgets.QCheckBox() rename_starting_number_layout.addWidget(self.rename_start_label) rename_starting_number_layout.addWidget(self.rename_start_number) rename_starting_number_layout.addSpacerItem( QtWidgets.QSpacerItem(5, 5, QtWidgets.QSizePolicy.Expanding) ) rename_starting_number_layout.addWidget(self.list_end_condition_label) rename_starting_number_layout.addWidget( self.list_end_condition_checkbox) rename_widget.layout().addLayout(Splitter.SplitterLayout()) # Execute List Rename Button rename_button_layout = QtWidgets.QHBoxLayout() rename_button_layout.setContentsMargins(4, 0, 4, 0) rename_button_layout.setSpacing(0) rename_widget.layout().addLayout(rename_button_layout) self.rename_label = QtWidgets.QLabel('') rename_button = QtWidgets.QPushButton('Rename') rename_button.setFixedHeight(20) rename_button.setFixedWidth(55) rename_button_layout.addWidget(self.rename_label) rename_button_layout.addWidget(rename_button) # Replace Widget replace_widget = QtWidgets.QWidget() # Widget holding lower name stuff replace_widget.setLayout(QtWidgets.QVBoxLayout()) replace_widget.layout().setContentsMargins(0, 0, 0, 0) replace_widget.layout().setSpacing(2) replace_widget.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) self.layout().addWidget(replace_widget) replace_splitter = Splitter.Splitter('Find & Replace') replace_widget.layout().addWidget(replace_splitter) find_label = QtWidgets.QLabel('Find: ') self.find_line_edit = QtWidgets.QLineEdit() replace_label = QtWidgets.QLabel('Replace: ') self.replace_line_edit = QtWidgets.QLineEdit() find_label.setFixedWidth(55) replace_label.setFixedWidth(55) reg_ex = QtCore.QRegExp('[0-9a-zA-Z_]+') text_validator = QtGui.QRegExpValidator(reg_ex, self.rename_line_edit) self.find_line_edit.setValidator(text_validator) self.replace_line_edit.setValidator(text_validator) find_layout = QtWidgets.QHBoxLayout() find_layout.setContentsMargins(4, 0, 4, 0) find_layout.setSpacing(2) find_layout.addWidget(find_label) find_layout.addWidget(self.find_line_edit) replace_widget.layout().addLayout(find_layout) replace_layout = QtWidgets.QHBoxLayout() replace_layout.setContentsMargins(4, 0, 4, 0) replace_layout.setSpacing(2) replace_layout.addWidget(replace_label) replace_layout.addWidget(self.replace_line_edit) replace_widget.layout().addLayout(replace_layout) replace_widget.layout().addLayout(Splitter.SplitterLayout()) selection_layout = QtWidgets.QHBoxLayout() selection_layout.setContentsMargins(4, 0, 4, 0) selection_layout.setSpacing(2) replace_widget.layout().addLayout(selection_layout) selection_mode_label = QtWidgets.QLabel('Selection Mode: ') self.selected_radio_button = QtWidgets.QRadioButton('Selected') self.selected_radio_button.setFixedHeight(19) self.selected_radio_button.setChecked(True) self.hierarchy_radio_button = QtWidgets.QRadioButton('Hierarchy') self.hierarchy_radio_button.setFixedHeight(19) selection_layout.addWidget(selection_mode_label) spacer_item = QtWidgets.QSpacerItem(5, 5, QtWidgets.QSizePolicy.Expanding) selection_layout.addSpacerItem(spacer_item) selection_layout.addWidget(self.selected_radio_button) selection_layout.addWidget(self.hierarchy_radio_button) replace_widget.layout().addLayout(Splitter.SplitterLayout()) replace_button = QtWidgets.QPushButton('Replace') replace_button.setFixedHeight(20) replace_button.setFixedWidth(55) replace_button_layout = QtWidgets.QHBoxLayout() replace_button_layout.setContentsMargins(4, 0, 4, 0) replace_button_layout.setSpacing(0) replace_button_layout.setAlignment(QtCore.Qt.AlignRight) replace_button_layout.addWidget(replace_button) replace_widget.layout().addLayout(replace_button_layout) # Prefix and Suffix additions_widget = QtWidgets.QWidget() additions_widget.setLayout(QtWidgets.QVBoxLayout()) additions_widget.layout().setContentsMargins(0, 0, 0, 0) additions_widget.layout().setSpacing(2) additions_widget.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) self.layout().addWidget(additions_widget) # Label Splitter additions_splitter = Splitter.Splitter('Prefix & Suffix') additions_widget.layout().addWidget(additions_splitter) prefix_layout = QtWidgets.QHBoxLayout() prefix_layout.setContentsMargins(4, 0, 4, 0) prefix_layout.setSpacing(2) additions_widget.layout().addLayout(prefix_layout) suffix_layout = QtWidgets.QHBoxLayout() suffix_layout.setContentsMargins(4, 0, 4, 0) suffix_layout.setSpacing(2) additions_widget.layout().addLayout(suffix_layout) prefix_label = QtWidgets.QLabel('Prefix:') self.prefix_line_edit = QtWidgets.QLineEdit() self.prefix_add_button = QtWidgets.QPushButton('+') self.prefix_remove_button = QtWidgets.QPushButton('-') self.prefix_replace_button = QtWidgets.QPushButton('><') # Change later prefix_layout.addWidget(prefix_label) prefix_layout.addWidget(self.prefix_line_edit) prefix_layout.addWidget(self.prefix_add_button) prefix_layout.addWidget(self.prefix_remove_button) prefix_layout.addWidget(self.prefix_replace_button) suffix_label = QtWidgets.QLabel('Suffix:') self.suffix_line_edit = QtWidgets.QLineEdit() self.suffix_add_button = QtWidgets.QPushButton('+') self.suffix_remove_button = QtWidgets.QPushButton('-') self.suffix_replace_button = QtWidgets.QPushButton('><') # Change later suffix_layout.addWidget(suffix_label) suffix_layout.addWidget(self.suffix_line_edit) suffix_layout.addWidget(self.suffix_add_button) suffix_layout.addWidget(self.suffix_remove_button) suffix_layout.addWidget(self.suffix_replace_button) prefix_label.setFixedWidth(55) suffix_label.setFixedWidth(55) self.prefix_add_button.setFixedWidth(25) self.prefix_remove_button.setFixedWidth(25) self.prefix_replace_button.setFixedWidth(25) self.suffix_add_button.setFixedWidth(25) self.suffix_remove_button.setFixedWidth(25) self.suffix_replace_button.setFixedWidth(25) additions_widget.layout().addLayout(Splitter.SplitterLayout()) # Name Cleanup cleanup_widget = QtWidgets.QWidget() cleanup_widget.setLayout(QtWidgets.QVBoxLayout()) cleanup_widget.layout().setContentsMargins(0, 0, 0, 0) cleanup_widget.layout().setSpacing(2) cleanup_widget.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) self.layout().addWidget(cleanup_widget) # Label Splitter cleanup_splitter = Splitter.Splitter('Cleanup') cleanup_widget.layout().addWidget(cleanup_splitter) cleanup_layout = QtWidgets.QHBoxLayout() cleanup_layout.setContentsMargins(4, 0, 4, 0) cleanup_layout.setSpacing(2) cleanup_widget.layout().addLayout(cleanup_layout) self.end_digits_button = QtWidgets.QPushButton('End Digits') # Below buttons need proper functions written...----------- self.shape_name_button = QtWidgets.QPushButton('Shape Names') self.deformer_name_button = QtWidgets.QPushButton('Deformer Names') cleanup_layout.addWidget(self.end_digits_button) cleanup_layout.addWidget(self.shape_name_button) cleanup_layout.addWidget(self.deformer_name_button) # State Change modifiers # Need to set the changed status of the alphanumeric radio buttons to # influence the _toggle_rename_vis() function self.rename_alpha_radio.clicked.connect(self._toggle_rename_vis) self.rename_number_radio.clicked.connect(self._toggle_rename_vis) self.lower_radio.clicked.connect(self._update_example) self.upper_radio.clicked.connect(self._update_example) self.rename_start_number.valueChanged.connect(self._update_example) self.rename_line_edit.textChanged.connect(self._update_example) rename_button.clicked.connect(self.list_rename) replace_button.clicked.connect(self.replace_text) self.prefix_add_button.clicked.connect( partial(self.edit_prefix, True, False, False)) self.prefix_remove_button.clicked.connect( partial(self.edit_prefix, False, False, True)) self.prefix_replace_button.clicked.connect( partial(self.edit_prefix, False, True, False)) self.suffix_add_button.clicked.connect( partial(self.edit_suffix, True, False, False)) self.suffix_remove_button.clicked.connect( partial(self.edit_suffix, False, False, True)) self.suffix_replace_button.clicked.connect( partial(self.edit_suffix, False, True, False)) self.end_digits_button.clicked.connect(clear_end_digits) self._update_example()
import DHT import Splitter from DHT import DistributedHashTable md5hashstring = '' count = 1 md5hashstring = str(Splitter.split()) distribute = DistributedHashTable(md5hashstring) distribute.tochararray() print distribute.chararray distribute.createkeyvaluepairs() print distribute.pieces peerlist = [1, 2, 3, 4] print md5hashstring
def train(self, filenames): for each in filenames: file = open(each, 'r'); string = file.read() words = Splitter.imp_split(Splitter.detect_const(string)) # initialize window window = ['\n'] * (self.window_left + 1) for word in words[:self.window_right]: window.append(word) cur_word_index = self.window_left # initialize queue for delayed words and declare class for this words dict_delayed = deque() class Delayed: def __init__(self, word, delay): self.word = word self.delay = delay # process training for each word for new_word in words[self.window_right:]: # shift window for i in range(0, self.window_len - 1): window[i] = window[i + 1] window[self.window_len - 1] = new_word # debug print print('Window: ' + repr(window)) # make indexes (current is first) indexes = [window[cur_word_index] if window[cur_word_index] in self.dictionary else self.unknown] for word in window: if word is not window[cur_word_index]: indexes.append(word if word in self.dictionary else self.unknown) # debug print print('Indexes: ' + repr(indexes)) # increment to cell pointed by list of indexes multidict_operation(self.counters, indexes, inc) # debug print print('Counter: ' + repr(multidict_operation(self.counters, indexes, id))) # adding current word to delayed dict if necessary if window[cur_word_index] not in self.dictionary: cur_word_delay = self.dict_delay - sum([delayed.delay for delayed in dict_delayed]) dict_delayed.append(Delayed(window[cur_word_index], cur_word_delay)) # move first object to dictionary if delay is expired if len(dict_delayed) and dict_delayed[0].delay == 0: self.dictionary.add(dict_delayed[0].word) dict_delayed.popleft() if len(dict_delayed): dict_delayed[0].delay -= 1 # debug print print('Dict: ' + repr(self.dictionary)) print('Delayed: ' + repr([delayed.word for delayed in dict_delayed])) print('\n') # decrement each cell in counters[self.unknown] multidict_cycle(self.counters[self.unknown], self.window_len - 1, dec) # debug print multidict_cycle_indexes(self.counters, self.window_len, print_cell)
import Splitter with open('./Examples_train/Bayes_Classifier.py', 'r') as file: string = file.read() words = Splitter.imp_split(Splitter.detect_const(string)) print(words)
def train(self, filenames): for each in filenames: file = open(each, 'r') string = file.read() words = Splitter.imp_split(Splitter.detect_const(string)) # initialize window window = ['\n'] * (self.window_left + 1) for word in words[:self.window_right]: window.append(word) cur_word_index = self.window_left # initialize queue for delayed words and declare class for this words dict_delayed = deque() class Delayed: def __init__(self, word, delay): self.word = word self.delay = delay # process training for each word for new_word in words[self.window_right:]: # shift window for i in range(0, self.window_len - 1): window[i] = window[i + 1] window[self.window_len - 1] = new_word # debug print print('Window: ' + repr(window)) # make indexes (current is first) indexes = [ window[cur_word_index] if window[cur_word_index] in self.dictionary else self.unknown ] for word in window: if word is not window[cur_word_index]: indexes.append(word if word in self.dictionary else self.unknown) # debug print print('Indexes: ' + repr(indexes)) # increment to cell pointed by list of indexes multidict_operation(self.counters, indexes, inc) # debug print print('Counter: ' + repr(multidict_operation(self.counters, indexes, id))) # adding current word to delayed dict if necessary if window[cur_word_index] not in self.dictionary: cur_word_delay = self.dict_delay - sum( [delayed.delay for delayed in dict_delayed]) dict_delayed.append( Delayed(window[cur_word_index], cur_word_delay)) # move first object to dictionary if delay is expired if len(dict_delayed) and dict_delayed[0].delay == 0: self.dictionary.add(dict_delayed[0].word) dict_delayed.popleft() if len(dict_delayed): dict_delayed[0].delay -= 1 # debug print print('Dict: ' + repr(self.dictionary)) print('Delayed: ' + repr([delayed.word for delayed in dict_delayed])) print('\n') # decrement each cell in counters[self.unknown] multidict_cycle(self.counters[self.unknown], self.window_len - 1, dec) # debug print multidict_cycle_indexes(self.counters, self.window_len, print_cell)