class TestWorkClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = WorkClassifier(name="Test",
                                         datastore=TEST_REDIS)
        

    def test_init(self):
        self.assert_(self.classifier)

    def test_tokenize_marc21_one(self):
        marc_record = pymarc.Record()
        marc_record.leader = marc_record.leader[0:6] + 'a' + marc_record.leader[7:]
        marc_record.add_field(
            pymarc.Field(tag='100',
                         indicators=['1', ' '],
                         subfields=['a','Naslund, Sena Jeter.']))
        marc_record.add_field(
            pymarc.Field(tag='245',
                         indicators=['1', '0'],
                         subfields = ['a', "Ahab's wife, or, The star-gazer :",
                                      'b', "a novel /"]))
        
        
        self.assertEquals(
            sorted(self.classifier.__tokenize_marc21__(marc_record)),
            ['ahab',
             'book',
             'gazer',
             'jeter',
             'naslund',
             'novel',
             'sena',
             'star',
             'wife'])

    def test_tokenize_marc21_title_only(self):
        marc_record = pymarc.Record()
        marc_record.add_field(
            pymarc.Field(tag='245',
                         indicators=['1', '0'],
                         subfields = ['a', "Ahab's wife, or, The star-gazer :",
                                      'b', "a novel /"]))
        self.assertEquals(
            self.classifier.__tokenize_marc21__(marc_record),
            ['novel', 'star', 'ahab', 'wife', 'work', 'gazer'])

    

    def tearDown(self):
        TEST_REDIS.flushdb()
def create_row_from_marc(marc_record,
                         is_work=False,
                         reason=None):
    """Function creates a table row from a MARC21 record

    Keywords:
    marc_record -- pymarc MARC21 record 
    """
    tr = etree.Element("tr")
    # Record ID
    bib_id_td = etree.SubElement(tr, "td")
    field907 = marc_record['907']
    if field907.subfields.count('a') > 0:
        bib_id_td.text = field907['a'][1:-1]
    # Title
    title = marc_record.title()
    title_td = etree.SubElement(tr, "td")
    if title is not None:
        title_td.text = title
    # Author
    author = marc_record.author()
    author_td = etree.SubElement(tr, "td")
    if author is not None:
        author_td.text = author
    # Tokenized terms
    classifier = WorkClassifier(name=title, datastore=None)
    terms = classifier.__tokenize_marc21__(marc_record)
    terms_td = etree.SubElement(tr, "td")
    if terms is not None:
        terms_td.text = str(sorted(terms))
    # Is work?
    is_work_td = etree.SubElement(tr, "td")
    if is_work is True:
        is_work_td.text = "True"
    else:
        is_work_td.text = "False"
    if reason is not None:
        is_work_td.text += ', {0}'.format(reason)
    return etree.tostring(tr)
예제 #3
0
def create_row_from_marc(marc_record, is_work=False, reason=None):
    """Function creates a table row from a MARC21 record

    Keywords:
    marc_record -- pymarc MARC21 record 
    """
    tr = etree.Element("tr")
    # Record ID
    bib_id_td = etree.SubElement(tr, "td")
    field907 = marc_record['907']
    if field907.subfields.count('a') > 0:
        bib_id_td.text = field907['a'][1:-1]
    # Title
    title = marc_record.title()
    title_td = etree.SubElement(tr, "td")
    if title is not None:
        title_td.text = title
    # Author
    author = marc_record.author()
    author_td = etree.SubElement(tr, "td")
    if author is not None:
        author_td.text = author
    # Tokenized terms
    classifier = WorkClassifier(name=title, datastore=None)
    terms = classifier.__tokenize_marc21__(marc_record)
    terms_td = etree.SubElement(tr, "td")
    if terms is not None:
        terms_td.text = str(sorted(terms))
    # Is work?
    is_work_td = etree.SubElement(tr, "td")
    if is_work is True:
        is_work_td.text = "True"
    else:
        is_work_td.text = "False"
    if reason is not None:
        is_work_td.text += ', {0}'.format(reason)
    return etree.tostring(tr)
 def setUp(self):
     self.classifier = WorkClassifier(name="Test",
                                      datastore=TEST_REDIS)