def run(self): """ This method creates a pool of threads, starts them, and waits for the 'input_queue' to be empty before asking them to stop. Results, if any, will be available in the 'output_queue'. """ self._set_pool_size() log.debug('Active threads: %d' % threading.active_count()) #@UndefinedVariable # Create threads and add them to the pool for i in range(self.pool_size): #@UnusedVariable thread = self.thread_class(self.in_queue, self.out_queue, **self._thread_args) thread.name = 'Worker-%02d' % i self._thread_pool.append(thread) thread.start() log.debug('Active threads: %d' % threading.active_count()) #@UndefinedVariable # Wait for the threads to process all the clients in the queue while not self.in_queue.empty(): pass # Ask threads to stop for thread in self._thread_pool: thread.join() self.finished = True
def run(self): """ Runs indefinitely until it is asked to finish. Processes files from the 'input_queue' and supplies them to a 'ReferenceMaker' object. Once the ReferenceMaker is done, it stores the results in tuples (file, reference) to the output queue. """ log.debug("Running thread", extra={'threadname':self.getName()}) #@UndefinedVariable while not self.stop_event.isSet(): file = None if not self.in_queue.empty(): try: file = self.in_queue.get(False) except Queue.Empty: continue if file: log.debug("Processing file %s" % file) #@UndefinedVariable try: reference = ReferenceMaker().make_reference(file, self.target_format) self.out_queue.put(reference) except Exception, e: log.error('Unexpected exception while extracting reference' #@UndefinedVariable ' for file %s: %s' % (file, str(e))) self.out_queue.put(Extraction()) continue
def run(self): """ Runs indefinitely until it is asked to finish. Processes files from the 'input_queue' and supplies them to a 'ReferenceMaker' object. Once the ReferenceMaker is done, it stores the results in tuples (file, reference) to the output queue. """ log.debug("Running thread", extra={'threadname': self.getName()}) #@UndefinedVariable while not self.stop_event.isSet(): file = None if not self.in_queue.empty(): try: file = self.in_queue.get(False) except Queue.Empty: continue if file: log.debug("Processing file %s" % file) #@UndefinedVariable try: reference = ReferenceMaker().make_reference( file, self.target_format) self.out_queue.put(reference) except Exception, e: log.error( 'Unexpected exception while extracting reference' #@UndefinedVariable ' for file %s: %s' % (file, str(e))) self.out_queue.put(Extraction()) continue
def _create_new_collection(self): """ Creates, if possible, a new collection of wrappers for a url and field name """ collection_box = WrapperCollectionBox(self) result = collection_box.exec_() if result == QtGui.QDialog.Rejected: log.debug('Collection creation aborted') #@UndefinedVariable return log.debug('Creating new collection %s %s' % (collection_box.ui.urlLine.text(), collection_box.ui.fieldLine.text())) #@UndefinedVariable url = unicode(collection_box.ui.urlLine.text()) field = unicode(collection_box.ui.fieldLine.text()) collection = self.parent.wrapper_gw.new_wrapper_collection() collection.url = url collection.field = field item = self._add_collection(collection) self.ui.collections.setItemExpanded(item.parent(), True) self.ui.collections.setItemSelected(item, True)
def _rule_example(self, example): log.debug('Ruling example with MultiValuePathRuler') #@UndefinedVariable rule_example = super(MultiValuePathRuler, self)._rule_example values = list(example.value) count = len(values) example_rules = [] if not count: return [] # If there's only one value first_rules = rule_example(Example(values[0], example.content)) if count == 1: for rule in first_rules: #example_rules.append(MultiValuePathRule(rule.pattern)) example_rules.append(PathRule(rule.pattern)) return example_rules more_rules = rule_example(Example(values[1], example.content)) for f_rule in first_rules: f_rule_pattern = list(f_rule.pattern) if f_rule in more_rules: #example_rules.append(MultiValuePathRule(f_rule_pattern)) example_rules.append(PathRule(f_rule_pattern)) continue for s_rule in more_rules: if self._should_merge(f_rule, s_rule): f_rule_pattern = self._merge_patterns(f_rule.pattern, s_rule.pattern) example_rules.append(PathRule(f_rule_pattern)) return example_rules
def _update_fields(self, reference): """ Updates the fields of a reference """ log.debug('Updating reference') #@UndefinedVariable for index in range(self.editor.fields.topLevelItemCount()): item = self.editor.fields.topLevelItem(index) # Remove empty items if ((len(reference.fields) > index) and not (item.text(0) and item.text(1) and (item.text(2)))): reference.fields.pop(index) # Skip non-empty items that have an invalid status if not ((item.text(2) == 'True' or item.text(2) == 'False')): continue log.debug('Index: %d Number of fields %d' % (index , len(reference.fields))) #@UndefinedVariable try: name = unicode(item.text(0)) value = unicode(item.text(1)) valid = True if str(item.text(2)) == "True" else False except TypeError, e: log.error('Type error when casting to store to database %s' % str(e)) #@UndefinedVariable continue if(len(reference.fields) > index): reference.fields[index].name = name reference.fields[index].value = value reference.fields[index].valid = valid else: reference.add_field(name, value, valid)
def _update_people(self, tree, people, add_method): """ Updates the lists of authors or editors from a reference. The one that gets updated is decided depending on the people and add_method parameteres. """ for index in range(tree.topLevelItemCount()): item = tree.topLevelItem(index) if not (item.text(0) or item.text(1) or item.text(2)): if(len(people) > index): people.pop(index) continue log.debug("Index: %d Number of fields %d" % (index , len(people))) #@UndefinedVariable try: first_name = unicode(item.text(0)) middle_name = unicode(item.text(1)) last_name = unicode(item.text(2)) except TypeError, e: log.error("Type error when casting to store to database %s" % str(e)) #@UndefinedVariable continue if(len(people) > index): people[index].name = first_name people[index].value = middle_name people[index].valid = last_name else: add_method(first_name, middle_name, last_name)
def populate(self, extraction): if not extraction: return self.enter_populating() self.clear() self.extraction = extraction log.debug("Loading extraction for: %s" % extraction.file_path) #@UndefinedVariable self.editor.filePathLine.setText(extraction.file_path) self.editor.queryLine.setText(extraction.query_string) self.editor.resultLine.setText(extraction.result_url) if not extraction.references: self.exit_populating() return reference = extraction.references[0] self.editor.validitySpin.setValue(reference.validity) # Add fields for field in reference.fields: item = QtGui.QTreeWidgetItem(self.editor.fields) item.setFlags(QtCore.Qt.ItemIsEditable | QtCore.Qt.ItemIsEnabled) item.setText(0, field.name) item.setText(1, field.value) item.setText(2, repr(field.valid)) self.populate_people(self.editor.authors, reference.authors) self.populate_people(self.editor.editors, reference.editors) self.exit_populating()
def find_reference_by_id(self, id): if not id: raise ValueError log.debug('Querying the database. Reference with id %s' % str(id)) #@UndefinedVariable m_reference = (self.session.query(mappers.Reference). filter(mappers.Reference.id == id).one()) if not m_reference: return None log.debug('Creating new reference') #@UndefinedVariable reference = Reference() reference.id = m_reference.id reference.validity = m_reference.validity log.debug('Adding fields') #@UndefinedVariable for m_field in m_reference.fields: reference.set_field(m_field.name, m_field.value, m_field.valid) log.debug('Adding authors') #@UndefinedVariable authors = [] for m_author in m_reference.authors: authors.append(m_author.to_name_dict()) if authors: reference.set_field(u'author', authors, True) log.debug('Adding editors') #@UndefinedVariable editors = [] for m_editor in m_reference.editors: editors.append(m_editor.to_name_dict()) if editors: reference.set_field(u'editor', editors, True) return reference
def _evaluate_wrapper(self, wrapper, examples): """ It applies the wrapper to all the available examples and checks if it extracts the expected information. If so, it upvotes the wrapper, and downvotes otherwise. """ for example in examples: log.debug('Evaluating wrapper with example value %s ' % #@UndefinedVariable str(example.value)) info = wrapper.extract_info(example.content) # Evaluate depending if the extracted information is one single # value or a collection. if not info: ev_result = False elif type(info) is list: ev_result = self._evaluate_multi_value_wrapper(info, example.value) else: ev_result = self._evaluate_single_value_wrapper(info, example.value) if ev_result: wrapper.upvotes += 1 else: wrapper.downvotes += 1 log.debug('Result of evaluation: %s' % str(ev_result)) #@UndefinedVariable
def _mark_for_update(self): """ Marks the current reference for update """ if not self.populating: self.for_update = True log.debug("Reference marked for update") #@UndefinedVariable
def _change_show_string(self, new): log.debug('Changing show string for current item') #@UndefinedVariable if self.last_selected: show_path = self._get_show_string(str(new)) self.last_selected.show_path = show_path self.last_selected.setText(0, show_path)
def initializePage(self): log.debug("Initializing references page.") #@UndefinedVariable extractions = self.parent.extraction_gw.find_extractions() for extraction in extractions: self._add_extraction(extraction) self.ui.references.sortItems(0, QtCore.Qt.AscendingOrder)
def _update_rules(self, wrapper): for index in range(self.ui.rules.topLevelItemCount() - 1): item = self.ui.rules.topLevelItem(index) log.debug('Updating rule %d' % index) #@UndefinedVariable # Remove empty items if ((len(wrapper.rules) > index) and not (item.text(0) and item.text(1) and item.text(2))): wrapper.rules.pop(index) continue # Skip non-empty items that have an invalid status try: rule_type = str(item.text(0)) pattern = str(item.text(1)) order = int(str(item.text(2))) except (TypeError, ValueError): log.error('Error when casting to store to database') #@UndefinedVariable continue # Check that the pattern can be converted to a python object try: pattern_py = simplejson.loads(pattern) #@UnusedVariable except ValueError: log.debug('Cannot convert pattern %s to Python objects' % pattern) #@UndefinedVariable continue # Update or append the rules if(len(wrapper.rules) > index): wrapper.rules[index].rule_type = rule_type wrapper.rules[index].pattern = pattern wrapper.rules[index].order = order else: wrapper.add_rule_by_info(rule_type, pattern, order)
def apply(self, input): log.debug('Applying PathRule') #@UndefinedVariable pattern = list(self.pattern) self.value_guide = pattern.pop(0) self.context = pattern.pop(0) elements = self._get_path_element(pattern, input) return self._choose_element(elements)
def _mark_wrapper_for_update(self): """ Marks a wrapper for update if there have been changes to it and needs to be updated. Changes made to the collection while populating fields are discarded. """ if not self.populating: self.wrapper_for_update = True log.debug('Marked wrapper for update') #@UndefinedVariable
def run(self): extracted = 0 log.debug("Reference extraction thread running") #@UndefinedVariable while extracted < self.maximum: extracted = len(self.index_maker.processed) self.emit(QtCore.SIGNAL("output(int)"), extracted) time.sleep(0.5) log.debug("Exiting extraction thread") #@UndefinedVariable self.exit(0)
def _update_export_edit(self): log.debug('Finished formatting: %d formatted references' % #@UndefinedVariable len(self.thread.formatted_references)) formatted_references = self.thread.formatted_references text = '' for reference in formatted_references: text = ''.join([text, reference, '\n\n']) self.ui.entriesEdit.setText(text)
def _populate_collections(self): """ Adds all the collections from the database to the collections list, grouped by url. """ log.debug("Populating collections list") #@UndefinedVariable self.enter_populating() for collection in self.parent.wrapper_gw.find_wrapper_collections(): self._add_collection(collection) self.exit_populating()
def extract_info(self, source, page): """ Extracts a reference from the given page. """ if source not in self._available_wrappers.keys(): log.debug('No reference wrapper available for source %s' % source) #@UndefinedVariable return (None, None) wrapper_method = getattr(self, '_do_' + self._available_wrappers[source]) return wrapper_method(source, page)
def _create_new_reference(self): extraction = self.parent.extraction_gw.new_extraction() item = self._add_extraction(extraction) log.debug('Changing selection to the new item') #@UndefinedVariable # Change selection to current wrapper try: self.ui.references.setItemSelected(self.last_selected, False) except: log.debug('Error unselecting extraction') #@UndefinedVariable self.ui.references.setItemSelected(item, True)
def initializePage(self): log.addHandler(self.guihandler) #@UndefinedVariable path = self.field('filePath').toPyObject() log.debug("Starting importing references from: %s" % path) #@UndefinedVariable self.thread = ReferenceImporterThread(str(path), self) # Connect thread signals self.connect(self.thread, QtCore.SIGNAL("finished()"), self.finish) self.connect(self.thread, QtCore.SIGNAL("terminated()"), self.finish) self.thread.start()
def _apply_single_input(self, input): log.debug('Applying RegexRule with pattern %s' % self.pattern) #@UndefinedVariable try: regex = re.compile(self.pattern) input = input.strip() matches = re.search(regex, input) except Exception, e: log.error('Exception applying RegexRule with pattern %s: %s' #@UndefinedVariable % (self.pattern, e)) return ''
def _prune_wrappers(self, wrappers): log.debug('Prunning %d wrappers.' % len(wrappers)) #@UndefinedVariable max = self.max_wrappers prunned = [] for wrapper in wrappers: max -= 1 mv = self.min_validity / 2.0 if wrapper.score > mv or max >= 0: prunned.append(wrapper) log.debug('After prunning: %d wrappers' % len(prunned)) #@UndefinedVariable return prunned
def done(self, status): path = QtGui.QFileDialog.getSaveFileName(self, caption='Save references to file', filter='BibTeX (*.bib)') if not path: return log.debug('Saving to file: %s' % path) #@UndefinedVariable try: file = open(path, 'w') file.write(unicode(self.page01.ui.entriesEdit.toPlainText())) file.close() except Exception, e: log.error('Error saving references to %s' % e) #@UndefinedVariable
def _update_wrapper(self): """ Updates last selected wrapper with any changed values of new rules """ if not (self.wrapper_for_update and self.last_selected_wrapper): return log.debug('Updating last selected wrapper') #@UndefinedVariable wrapper = self.last_selected_wrapper.wrapper self._update_score(wrapper) self._update_rules(wrapper)
def extract_info(self, input): """ Applies the rules' chain to extract the piece of information. """ log.debug('Applying ruled wrapper') #@UndefinedVariable result = input for rule in self.rules: if result: result = rule.apply(result) else: break return result
def _rule_example(self, example): log.debug('Ruling example with PathRuler. Value %s' % #@UndefinedVariable str(example.value)) rules = [] element_rules = [] for element in self._get_content_elements(example.value, example.content): rule = self._rule_element(example, element) if rule: element_rules.append(rule) self._merge_rules(rules, element_rules) return rules
def _rule_example(self, example): log.debug('Ruling example with RegexRuler') #@UndefinedVariable rules = [] if type(example.content) is str or type(example.content) is unicode: example.content = [example.content] for element in example.content: rule = self._rule_example_content(example.value, element) if rule: rules.append(rule) return rules
def _format_references(self): if self.populating: return log.debug('Item checked/unchecked') #@UndefinedVariable self.ui.entriesEdit.setText('Updating...') items = self._get_checked_items() log.debug('Items selected: %d' % len(items)) #@UndefinedVariable self.thread.items = items self.thread.start()
def apply(self, input): log.debug('Applying MultiValueRegexRule with pattern %s' % #@UndefinedVariable (str(self.pattern)[:30])) results = [] regex = re.compile(self.pattern) for string in input: if len(string) > MAX_ELEM_CONTENT_LEN: continue matches = re.match(regex, string) if matches and len(matches.groups()) > 0: results.append(matches.group(1)) return results
def _validate_reference_fields(self, reference, raw_text): """ This method is a complement for _use_reference_wrappers """ log.debug('Validating reference fields') #@UndefinedVariable for field_name in reference.fields: field = reference.get_field(field_name) try: validator = self.field_validation[field_name][1] except KeyError: validator = None valid = validator.validate(field.value, raw_text) if validator else True field.valid = valid
def apply(self, input): log.debug('Applying PersonRule') #@UndefinedVariable if not type(input) == list: return [] names = [] for person in input: person = re.sub('\d', '', person) person = person.strip() if not person: continue name = split_name(person) if name: names.append(name) return names