def test_validate_incorrect_reference(self): incorrect_ref = Reference() incorrect_ref.set_field('title', ('some arbitrary text'), False) incorrect_ref.set_field('author', [{'first_name':'Jose-Luis', 'last_name':'Sancho', 'middle_name':''}], True) self.rv.validate(incorrect_ref) self.failUnless(incorrect_ref.validity < 0.5)
def test_validate_reference_fields(self): ref = Reference() ref.set_field('title', 'Some article title') ref.set_field('year', '32') raw_text = "Some article title and something else" self.iec._validate_reference_fields(ref, raw_text) self.failUnless(ref.get_field('title').valid == True) self.failUnless(ref.get_field('year').valid == False)
def test_validate_reference_fields(self): ref = Reference() ref.set_field('title', 'Some article title') ref.set_field('year', '32') raw_text = "Some article title and something else" self.iec._validate_reference_fields(ref, raw_text) self.failUnless(ref.get_field('title').valid == True) self.failUnless(ref.get_field('year').valid == False)
def test_format_reference_different_format(self): ref = Reference() ref.set_field('reference_id', 'Lmadsen99') ref.set_field('title', 'Some article title') self.iec._format_reference(ref) self.failUnless(ref.get_entry().startswith('@article{Lmadsen99,')) self.failUnless(ref.get_format() == self.iec.format)
def test_format_reference_different_format(self): ref = Reference() ref.set_field('reference_id', 'Lmadsen99') ref.set_field('title', 'Some article title') self.iec._format_reference(ref) self.failUnless(ref.get_entry().startswith('@article{Lmadsen99,')) self.failUnless(ref.get_format() == self.iec.format)
def _use_rule_wrappers(self, source, page, raw_text): """ Look if there is any wrapper in the database for the given source. """ log.info('Attempting to extract reference with ruled wrappers') #@UndefinedVariable fields = {} reference = Reference() wrapper_manager = WrapperGateway(max_wrappers=self.max_wrappers) wrapper_field_collections = wrapper_manager.find_wrapper_collections(source) for collection in wrapper_field_collections: # Get the wrappers for the current collection url, field = collection.url, collection.field wrappers = wrapper_manager.get_wrappers(url, field) log.debug('Collection %s:%s has %d wrappers' % (url, field, #@UndefinedVariable len(wrappers))) # Get field validator try: validator = self.field_validation[collection.field][1] except KeyError: validator = None # Extract information using the wrappers we have for wrapper in wrappers: info = wrapper.extract_info(page) # we expect 'info' to be a string if type(info) == list and not (collection.field == 'author' or collection.field == 'editor'): continue log.debug('Info extracted by wrapper: %s' % info) #@UndefinedVariable valid = validator.validate(info, raw_text) if validator else True # Save the extracted info even if it's not correct. It will # be overwritten afterwards if necessary reference.set_field(field, info, valid) if not valid: log.debug('The extracted information is not valid. ' #@UndefinedVariable 'Downvoting wrapper.') wrapper.downvotes += 1 wrapper_manager.update_wrapper(wrapper) else: log.debug('The extracted information is valid. ' #@UndefinedVariable 'Upvoting wrapper') wrapper.upvotes += 1 wrapper_manager.update_wrapper(wrapper) fields[field] = info break if len(reference.fields) > 0: log.info('Extracted reference') #@UndefinedVariable return [reference] else: log.info('Could not extract reference using ruled wrappers') #@UndefinedVariable return []
def test_validate_correct_reference(self): correct_ref = Reference() correct_ref.set_field('author', [{'first_name':'Jose-Luis', 'last_name':'Sancho', 'middle_name':''}], True) correct_ref.set_field('title', ('Class separability estimation and ' 'incremental learning using boundary methods'), True) self.rv.validate(correct_ref) self.failUnless(correct_ref.validity == 1.0)
def test_validate_incorrect_reference(self): incorrect_ref = Reference() incorrect_ref.set_field('title', ('some arbitrary text'), False) incorrect_ref.set_field('author', [{ 'first_name': 'Jose-Luis', 'last_name': 'Sancho', 'middle_name': '' }], True) self.rv.validate(incorrect_ref) self.failUnless(incorrect_ref.validity < 0.5)
class TestReference(unittest.TestCase): def setUp(self): self.ref = Reference() def tearDown(self): pass def test_set_and_get_field(self): self.ref.set_field('random_field', 'random_value') self.failUnless(self.ref.get_field('random_field').value == 'random_value') def test_get_fields(self): self.ref.set_field('rf01', 'rv01') self.ref.set_field('rf02', 'rv02') self.ref.set_field('rf03', 'rv04') self.failUnless(len(self.ref.get_fields()) == 3) self.failUnless(self.ref.get_fields() == ['rf01', 'rf02', 'rf03']) def test_set_field_to_none(self): self.ref.set_field('some_field', None) field = self.ref.get_field('some_field') self.failUnless(field.valid == False) def test_set_and_get_entry(self): self.ref.set_entry('This is an entry') self.failUnless(self.ref.get_entry() == 'This is an entry')
def test_validate_correct_reference(self): correct_ref = Reference() correct_ref.set_field('author', [{ 'first_name': 'Jose-Luis', 'last_name': 'Sancho', 'middle_name': '' }], True) correct_ref.set_field('title', ('Class separability estimation and ' 'incremental learning using boundary methods'), True) self.rv.validate(correct_ref) self.failUnless(correct_ref.validity == 1.0)
def find_reference_by_id(self, id): if not id: raise ValueError log.debug('Querying the database. Reference with id %s' % str(id)) #@UndefinedVariable m_reference = (self.session.query(mappers.Reference). filter(mappers.Reference.id == id).one()) if not m_reference: return None log.debug('Creating new reference') #@UndefinedVariable reference = Reference() reference.id = m_reference.id reference.validity = m_reference.validity log.debug('Adding fields') #@UndefinedVariable for m_field in m_reference.fields: reference.set_field(m_field.name, m_field.value, m_field.valid) log.debug('Adding authors') #@UndefinedVariable authors = [] for m_author in m_reference.authors: authors.append(m_author.to_name_dict()) if authors: reference.set_field(u'author', authors, True) log.debug('Adding editors') #@UndefinedVariable editors = [] for m_editor in m_reference.editors: editors.append(m_editor.to_name_dict()) if editors: reference.set_field(u'editor', editors, True) return reference
def _use_rule_wrappers(self, source, page, raw_text): """ Look if there is any wrapper in the database for the given source. """ log.info('Attempting to extract reference with ruled wrappers' ) #@UndefinedVariable fields = {} reference = Reference() wrapper_manager = WrapperGateway(max_wrappers=self.max_wrappers) wrapper_field_collections = wrapper_manager.find_wrapper_collections( source) for collection in wrapper_field_collections: # Get the wrappers for the current collection url, field = collection.url, collection.field wrappers = wrapper_manager.get_wrappers(url, field) log.debug('Collection %s:%s has %d wrappers' % ( url, field, #@UndefinedVariable len(wrappers))) # Get field validator try: validator = self.field_validation[collection.field][1] except KeyError: validator = None # Extract information using the wrappers we have for wrapper in wrappers: info = wrapper.extract_info(page) # we expect 'info' to be a string if type(info) == list and not (collection.field == 'author' or collection.field == 'editor'): continue log.debug('Info extracted by wrapper: %s' % info) #@UndefinedVariable valid = validator.validate(info, raw_text) if validator else True # Save the extracted info even if it's not correct. It will # be overwritten afterwards if necessary reference.set_field(field, info, valid) if not valid: log.debug( 'The extracted information is not valid. ' #@UndefinedVariable 'Downvoting wrapper.') wrapper.downvotes += 1 wrapper_manager.update_wrapper(wrapper) else: log.debug( 'The extracted information is valid. ' #@UndefinedVariable 'Upvoting wrapper') wrapper.upvotes += 1 wrapper_manager.update_wrapper(wrapper) fields[field] = info break if len(reference.fields) > 0: log.info('Extracted reference') #@UndefinedVariable return [reference] else: log.info('Could not extract reference using ruled wrappers' ) #@UndefinedVariable return []
class TestReferenceFormatter(unittest.TestCase): def setUp(self): self.ref = Reference() self.ref.set_field('reference_id', 'Lmadsen99') self.ref.set_field('author', [{ 'first_name': 'Lars', 'last_name': 'Madsen', 'middle_name': 'Lithen' }]) self.ref.set_field('title', 'Some article title') self.ref.set_field('pages', '133--144') self.ref.set_field('journal', 'Some journal') self.ref.set_field('year', '1999') self.ref_formatter = ReferenceFormatter() self.format_generator = BibtexGenerator() def tearDown(self): pass def test_formatter(self): self.ref_formatter.format_reference(self.ref, self.format_generator) entry = self.ref.get_entry() self.failUnless( entry == ('@article{Lmadsen99,' + os.linesep + 'title = {Some article title},' + os.linesep + 'author = {Madsen, Lithen, Lars},' + os.linesep + 'year = 1999,' + os.linesep + 'journal = {Some journal},' + os.linesep + 'pages = {133--144}' + os.linesep + '}' + os.linesep)) self.failUnless(self.ref.format == self.format_generator.format)