Exemplo n.º 1
0
    def test_bibtex2source(self):
        from clld.scripts.util import bibtex2source

        bibtex2source(Record('book', 'id', author='M, R and G, H and Z, U'))
        bibtex2source(Record('book', 'id', editor='M, R and G, H'))
        bibtex2source(
            Record('book', 'id', title='tb', customfield='cf', year="1920}"))
Exemplo n.º 2
0
def test_ContextObject():
    from clld.lib.coins import ContextObject

    c = ContextObject('sid', 'journal', ('jtitle', '\xe2'))
    assert '%C3%A2' in c.span_attrs()['title']
    c = ContextObject('sid', 'journal',
                      ('jtitle', binary_type('ä'.encode('utf8'))))
    assert '%C3%A4' in c.span_attrs()['title']

    bib = Record('book', '1', title='The Title', author='L, F')
    ContextObject('sid', 'book', ('btitle', 'the title'))
    ContextObject.from_bibtex('sid', bib)
    bib = Record('article',
                 '1',
                 title='The Title',
                 author='The One and The Other',
                 journal='J')
    ContextObject.from_bibtex('sid', bib)
    bib = Record('phdthesis', '1', title='The Title')
    ContextObject.from_bibtex('sid', bib)
    bib = Record('conference', '1', title='The Title', booktitle='something')
    co = ContextObject.from_bibtex('sid', bib)
    assert isinstance(co.span_attrs(), dict)

    assert ContextObject('äöü', 'äöü').span_attrs()
    assert ContextObject('äöü'.encode('latin1'), None).span_attrs()
Exemplo n.º 3
0
def add_sources(args, data):
    bib = Database.from_file(args.data_file('phoible-references.bib'),
                             lowercase=True)
    ext = [
        Record.from_string('@' + s, lowercase=True)
        for s in nfilter(BIB.split('@'))
    ]

    for rec in chain(ext, bib):
        if rec.id not in data['Source']:
            data.add(Source, rec.id, _obj=bibtex2source(rec))

    #
    # add aliases to lookup records with bibtex keys with numeric prefixes without
    # specifying the prefix
    #
    for key in list(data['Source'].keys()):
        if '_' in key:
            no, rem = key.split('_', 1)
            try:
                int(no)
                if rem not in data['Source']:
                    data['Source'][rem] = data['Source'][key]
            except (ValueError, TypeError):
                pass
Exemplo n.º 4
0
    def test_linearization(self):
        from clld.lib.bibtex import Record

        for bib, txt in [
            (
                """@book{Dayley-1985,
  address    = {Berkeley},
  author     = {Dayley, Jon P.},
  iso_code   = {tzt; tzj},
  olac_field = {general_linguistics; semantics; morphology; typology; syntax},
  publisher  = {University of California Press},
  series     = {University of California Publications in Linguistics},
  title      = {Tzutujil Grammar},
  volume     = {107},
  wals_code  = {tzu},
  year       = {1985}
}
                """,
                "Dayley, Jon P. 1985. Tzutujil Grammar. (University of California "
                "Publications in Linguistics, 107.) Berkeley: University of California "
                "Press."),
            (
                """@book{318762,
  address    = {Vancouver},
  author     = {Cook, Eung-Do},
  pages      = {670},
  publisher  = {UBC Press},
  series     = {First Nations Languages Series},
  title      = {A Tsilhqút'ín Grammar},
  year       = {2013}
}
                """,
                "Cook, Eung-Do. 2013. A Tsilhqút'ín Grammar. (First Nations Languages "
                "Series.) Vancouver: UBC Press. 670pp."),
            (
                """@inbook{316361,
  author     = {Healey, Alan},
  booktitle  = {New Guinea area languages and language study},
  pages      = {223-232},
  title      = {History of research in Austronesian languages: Admiralty Islands area},
  volume     = {2}
}
                """,
                "Healey, Alan. n.d. History of research in Austronesian languages: "
                "Admiralty Islands area. 2. 223-232."),
            (
                """@inproceedings{moisikesling2011,
  author    = {Moisik, Scott R. and Esling, John H.},
  booktitle = {Proceedings of the Congress of Phonetic Sciences (ICPhS XVII)},
  pages     = {1406-1409},
  title     = {The 'whole larynx' approach to laryngeal features},
  year      = {2011}
}""",
                "Moisik, Scott R. and Esling, John H. 2011. The 'whole larynx' approach "
                "to laryngeal features. In Proceedings of the Congress of "
                "Phonetic Sciences (ICPhS XVII), 1406-1409.")
        ]:
            rec = Record.from_string(bib)
            self.assertEqual(rec.text(), txt)
Exemplo n.º 5
0
    def test_Record(self):
        from clld.lib.bibtex import Record

        rec = Record(
            'book', '1',
            title='The Title', editor='ed', booktitle='bt', school='s', issue='i',
            pages='1-4', publisher='M')
        self.assertTrue('@book' in rec.__unicode__())
        self.assertTrue('@book' in rec.__str__())
        self.assertTrue('The Title' in rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        rec = Record.from_string(rec.__unicode__())
        rec = Record.from_object(Mock())
Exemplo n.º 6
0
    def test_linearization(self):
        from clld.lib.bibtex import Record

        for bib, txt in [
            ("""@book{Dayley-1985,
  address    = {Berkeley},
  author     = {Dayley, Jon P.},
  iso_code   = {tzt; tzj},
  olac_field = {general_linguistics; semantics; morphology; typology; syntax},
  publisher  = {University of California Press},
  series     = {University of California Publications in Linguistics},
  title      = {Tzutujil Grammar},
  volume     = {107},
  wals_code  = {tzu},
  year       = {1985}
}
                """,
             "Dayley, Jon P. 1985. Tzutujil Grammar. (University of California "
             "Publications in Linguistics, 107.) Berkeley: University of California "
             "Press."),
            ("""@book{318762,
  address    = {Vancouver},
  author     = {Cook, Eung-Do},
  pages      = {670},
  publisher  = {UBC Press},
  series     = {First Nations Languages Series},
  title      = {A Tsilhqút'ín Grammar},
  year       = {2013}
}
                """,
             "Cook, Eung-Do. 2013. A Tsilhqút'ín Grammar. (First Nations Languages "
             "Series.) Vancouver: UBC Press. 670pp."),
            ("""@inbook{316361,
  author     = {Healey, Alan},
  booktitle  = {New Guinea area languages and language study},
  pages      = {223-232},
  title      = {History of research in Austronesian languages: Admiralty Islands area},
  volume     = {2}
}
                """,
             "Healey, Alan. n.d. History of research in Austronesian languages: "
             "Admiralty Islands area. 2. 223-232."),
            ("""@inproceedings{moisikesling2011,
  author    = {Moisik, Scott R. and Esling, John H.},
  booktitle = {Proceedings of the Congress of Phonetic Sciences (ICPhS XVII)},
  pages     = {1406-1409},
  title     = {The 'whole larynx' approach to laryngeal features},
  year      = {2011}
}""", "Moisik, Scott R. and Esling, John H. 2011. The 'whole larynx' approach "
             "to laryngeal features. In Proceedings of the Congress of "
             "Phonetic Sciences (ICPhS XVII), 1406-1409.")
        ]:
            rec = Record.from_string(bib)
            self.assertEqual(rec.text(), txt)
Exemplo n.º 7
0
    def test_ContextObject(self):
        from clld.lib.coins import ContextObject

        bib = Record('book', '1', title='The Title', author='L, F')
        co = ContextObject('sid', 'book', ('btitle', 'the title'))
        co = ContextObject.from_bibtex('sid', bib)
        bib = Record('article',
                     '1',
                     title='The Title',
                     author='The One and The Other',
                     journal='J')
        co = ContextObject.from_bibtex('sid', bib)
        bib = Record('phdthesis', '1', title='The Title')
        co = ContextObject.from_bibtex('sid', bib)
        bib = Record('conference',
                     '1',
                     title='The Title',
                     booktitle='something')
        co = ContextObject.from_bibtex('sid', bib)
        self.assertTrue(isinstance(co.span_attrs(), dict))
Exemplo n.º 8
0
    def test_Record(self):
        from clld.lib.bibtex import Record

        rec = Record('book',
                     '1',
                     title='The Title',
                     editor='ed',
                     booktitle='bt',
                     school='s',
                     issue='i',
                     pages='1-4',
                     publisher='M')
        self.assertTrue('@book' in rec.__unicode__())
        self.assertTrue('@book' in rec.__str__())
        self.assertTrue('The Title' in rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        rec = Record.from_string(rec.__unicode__())
        rec = Record.from_object(Mock())
Exemplo n.º 9
0
    def test_Database(self):
        from clld.lib.bibtex import Record, Database

        db = Database([])
        self.assertEqual(len(db), 0)
        db = Database([Record('book', 'id')])
        self.assertEqual(db[0], db['id'])
        assert unicode(db)
        db = Database.from_file('notexisting.bib')
        self.assertEqual(len(db), 0)
        db = Database.from_file(TESTS_DIR.joinpath('test.bib'))
        self.assertEqual(len(db), 1)
Exemplo n.º 10
0
def _get_bibtex(refs):
    for ref in refs:
        genre = 'misc'
        id = ref['id']
        attrs = dict(all=ref['text'])
        t = ref['text']
        match = YEAR.search(t)
        if match:
            authors = 'editor' if match.group('ed') else 'author'
            attrs['key'], attrs[authors] = normalized_author(t[:match.start()].strip())
            attrs['title'], rem = [s.strip() for s in re.split('\.|\?', t[match.end():], 1)]
            attrs['year'] = match.group('year')
            attrs['key'] = '%(key)s %(year)s' % attrs
            m = EDS.match(rem)
            if m:
                assert 'editor' not in attrs
                attrs['editor'] = normalized_author(m.group('eds').strip())[1]
                genre = 'incollection'
                rem = rem[m.end():].strip()
                mm = BTITLE_PAGES.match(rem)
                if mm:
                    attrs['booktitle'] = mm.group('btitle').strip()
                    attrs['pages'] = mm.group('pages').strip()
                    rem = rem[mm.end():].strip()
            else:
                mm = JOURNAL.match(rem)
                if mm:
                    genre = 'article'
                    attrs['journal'] = mm.group('journal').strip()
                    attrs['volume'] = mm.group('volume').strip()
                    if mm.group('number'):
                        attrs['number'] = mm.group('number').strip()
                    attrs['pages'] = mm.group('pages').strip()
                    rem = rem[mm.end():].strip()
            m = PUBLISHER.match(rem)
            if m:
                if genre == 'misc':
                    genre = 'book'
                attrs['place'] = m.group('place').strip()
                attrs['publisher'] = m.group('publisher').strip()
                rem = rem[m.end():].strip()
            _rem = []
            for piece in [p.strip() for p in re.split('\.(?:\s+|$)', rem) if p.strip()]:
                if piece.startswith('http') and not re.search('\s+', piece):
                    attrs['url'] = piece
                elif piece.startswith('(') and piece.endswith(')'):
                    attrs['note'] = piece[1:-1].strip()
                else:
                    _rem.append(piece)
            rem = '. '.join(_rem)
            if not slug(unicode(rem)):
                del attrs['all']
        yield Record(genre, id, **attrs)
Exemplo n.º 11
0
    def test_Database(self):
        from clld.lib.bibtex import Record, Database

        db = Database([])
        self.assertEqual(len(db), 0)
        db = Database([Record('book', 'id')])
        self.assertEqual(db[0], db['id'])
        assert text_type(db)
        db = Database.from_file('notexisting.bib')
        self.assertEqual(len(db), 0)
        db = Database.from_file(TESTS_DIR.joinpath('test.bib'))
        self.assertEqual(len(db), 1)
        assert '@' in db[0]['title']
        assert [r for r in db]
        self.assertRaises(NotImplementedError, db.format, 'txt')
Exemplo n.º 12
0
def add_sources(args, data):
    bib = Database.from_file(args.data_file('phoible-references.bib'), lowercase=True)
    ext = [Record.from_string('@' + s, lowercase=True) for s in nfilter(BIB.split('@'))]

    for rec in chain(ext, bib):
        if rec.id not in data['Source']:
            data.add(Source, rec.id, _obj=bibtex2source(rec))

    #
    # add aliases to lookup records with bibtex keys with numeric prefixes without
    # specifying the prefix
    #
    for key in list(data['Source'].keys()):
        if '_' in key:
            no, rem = key.split('_', 1)
            try:
                int(no)
                if rem not in data['Source']:
                    data['Source'][rem] = data['Source'][key]
            except (ValueError, TypeError):
                pass
Exemplo n.º 13
0
def main(args):  # pragma: no cover
    ds = StructureDataset.from_metadata(DS)
    data = Data()
    for source in ds.sources:
        data.add(common.Source, source.id, _obj=bibtex2source(source))

    ext = [
        Record.from_string('@' + s, lowercase=True)
        for s in nfilter(BIB.split('@'))
    ]
    for rec in ext:
        if rec.id not in data['Source']:
            data.add(common.Source, rec.id, _obj=bibtex2source(rec))

    for contrib in ds['contributors.csv']:
        o = data.add(
            common.Contributor,
            contrib['ID'],
            id=contrib['ID'].upper(),
            name=contrib['Name'],
            description=contrib['Description'],
            url=contrib['URL'],
            jsondata={
                'readme': contrib['Readme'],
                'contents': contrib['Contents']
            },
        )
        for src in contrib['Source']:
            DBSession.add(
                models.ContributorReference(source=data['Source'][src],
                                            contributor=o))

    dataset = data.add(
        common.Dataset,
        'phoible',
        id='phoible',
        name='PHOIBLE 2.0',
        description='PHOIBLE 2.0',
        publisher_name="Max Planck Institute for the Science of Human History",
        publisher_place="Jena",
        publisher_url="https://www.shh.mpg.de",
        domain='phoible.org',
        license='https://creativecommons.org/licenses/by-sa/3.0/',
        contact='*****@*****.**',
        jsondata={
            'license_icon':
            'https://i.creativecommons.org/l/by-sa/3.0/88x31.png',
            'license_name':
            'Creative Commons Attribution-ShareAlike 3.0 Unported License'
        })

    for i, (cid, name) in enumerate([
        ('UZ', "Steven Moran"),
        ('mccloy', "Daniel McCloy"),
    ],
                                    start=1):
        contrib = data['Contributor'].get(cid)
        if not contrib:
            contrib = common.Contributor(id=cid, name=name)
        DBSession.add(
            common.Editor(dataset=dataset, ord=i, contributor=contrib))

    glottolog = Glottolog(
        Path(phoible.__file__).parent.parent.parent.parent.joinpath(
            'glottolog', 'glottolog'))

    for lang in ds['LanguageTable']:
        l = data.add(
            models.Variety,
            lang['ID'],
            id=lang['ID'],
            name=lang['Name'],
        )

    load_families(data, [(l.id, l)
                         for l in data['Variety'].values() if len(l.id) == 8],
                  glottolog.repos)
    DBSession.flush()

    # assign color codes:
    families = defaultdict(list)
    for l in data['Variety'].values():
        families[l.family_pk].append(l)

    colors = color.qualitative_colors(len(families))
    for i, langs in enumerate(sorted(families.values(),
                                     key=lambda v: -len(v))):
        for l in langs:
            l.jsondata = {'color': colors[i]}

    for segment in ds['ParameterTable']:
        equivalence_class = ''.join([
            t[0] for t in [(c, unicodedata.name(c)) for c in segment['Name']]
            if t[1].split()[0] not in ['COMBINING', 'MODIFIER']
        ]),
        data.add(models.Segment,
                 segment['ID'],
                 id=segment['ID'],
                 name=segment['Name'],
                 description=segment['Description'],
                 segment_class=segment['SegmentClass'],
                 equivalence_class=equivalence_class)
    DBSession.flush()

    # Add redirects for old language pages! get relevant ISO codes and map to Glottocode!
    for model, repls in load(
            Path(phoible.__file__).parent.parent /
            'replacements.json').items():
        if model == 'Language':
            languoids = {l.id: l for l in glottolog.languoids()}
            iso_languoids = {l.iso: l for l in languoids.values() if l.iso}
            gl_in_phoible = set(data['Variety'].keys())
            for oid, nid in repls.items():
                gls = descendants_from_nodemap(
                    iso_languoids.get(oid),
                    languoids).intersection(gl_in_phoible)
                if gls:
                    nid = gls.pop()
                    if len(gls) > 1:
                        print('+++', oid, gls)
                else:
                    print('---', oid)
                common.Config.add_replacement(oid, nid, common.Language)
        elif model == 'Parameter':
            segments_in_phoible = set(data['Segment'].keys())
            for oid, nid in repls.items():
                id_ = nid if nid in segments_in_phoible else None
                common.Config.add_replacement(oid, id_, common.Parameter)

    for segment in ds['ParameterTable']:
        for i, (k, v) in enumerate(sorted(segment.items())):
            if k not in ['ID', 'Name', 'Description', 'SegmentClass']:
                DBSession.add(
                    common.Parameter_data(
                        key=feature_name(k),
                        value=v,
                        ord=i,
                        object_pk=data['Segment'][segment['ID']].pk))

    for inventory in ds['contributions.csv']:
        inv = data.add(
            models.Inventory,
            inventory['ID'],
            id=inventory['ID'],
            name='{0} ({1} {2})'.format(
                inventory['Name'],
                inventory['Contributor_ID'].upper(),
                inventory['ID'],
            ),
            source_url=inventory['URL'],
            count_tone=inventory['count_tones'],
            count_vowel=inventory['count_vowels'],
            count_consonant=inventory['count_consonants'],
        )
        DBSession.add(
            common.ContributionContributor(
                contribution=inv,
                contributor=data['Contributor'][
                    inventory['Contributor_ID'].upper()]))
        for src in inventory['Source']:
            DBSession.add(
                common.ContributionReference(contribution=inv,
                                             source=data['Source'][src]))

    for phoneme in ds['ValueTable']:
        lang = data['Variety'][phoneme['Language_ID']]
        inv = data['Inventory'][phoneme['Contribution_ID']]
        if not inv.language:
            inv.language = lang
        vs = common.ValueSet(
            id=phoneme['ID'],
            contribution=inv,
            language=lang,
            parameter=data['Segment'][phoneme['Parameter_ID']])

        for ref in phoneme['Source']:
            DBSession.add(
                common.ValueSetReference(source=data['Source'][ref],
                                         valueset=vs))

        DBSession.add(
            models.Phoneme(
                id=phoneme['ID'],
                name='%s %s' %
                (phoneme['Value'],
                 data['Inventory'][phoneme['Contribution_ID']].name),
                allophones=' '.join(phoneme['Allophones']),
                marginal=phoneme['Marginal'],
                valueset=vs))

    return
Exemplo n.º 14
0
    def test_Record(self):
        from clld.lib.bibtex import Record, EntryType

        rec = Record('article', '1', author=['a', 'b'], editor='a and b')
        self.assertEqual(rec['author'], 'a and b')
        self.assertEqual(rec.get('author'), rec.getall('author'))
        self.assertEqual(rec['editor'], rec.get('editor'))
        self.assertEqual(rec.getall('editor'), ['a', 'b'])

        rec = Record(
            'book', '1',
            title='The Title',
            author='author',
            editor='ed',
            booktitle='bt',
            school='s',
            issue='i',
            pages='1-4',
            publisher='M',
            note="Revised edition")
        self.assertIn('@book', rec.__unicode__())
        self.assertIn('@book', rec.__str__())
        self.assertIn('bt', rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        Record.from_string(rec.__unicode__(), lowercase=True)
        Record.from_object(Mock())

        rec = Record(
            'incollection', '1',
            title='The Title', editor='ed', booktitle='bt', school='s', issue='i',
            pages='1-4', publisher='M', note="Revised edition")
        self.assertIn('In ', rec.text())

        rec = Record(
            'article', '1',
            title='The Title', journal='The Journal', volume="The volume", issue='issue')
        self.assertTrue('The Journal' in rec.text())

        rec = Record('xmisc', '1', note='Something')
        self.assertEqual(rec.genre, EntryType.misc)
        self.assertIn('Something', rec.text())
Exemplo n.º 15
0
    def test_bibtex2source(self):
        from clld.scripts.util import bibtex2source

        bibtex2source(Record('book', 'id', title='tb', customfield='cf', year="1920}"))
Exemplo n.º 16
0
# -*- coding: utf-8 -*-
from clld.lib.bibtex import Record
from clldutils.misc import slug

CFG = {
    'EXPORTS': [],
    'PUBLICATIONS': [
        Record(
            'ARTICLE',
            'HammarstroemEtAl2011Oslo',
            ('author', u'Harald Hammarström and Sebastian Nordhoff'),
            ('year', '2011'),
            ('title',
             'LangDoc: Bibliographic Infrastructure for Linguistic Typology'),
            ('journal', 'Oslo Studies in Language'),
            ('volume', '3'),
            ('number', '2'),
            ('pages', '31-43'),
            ('url',
             'https://www.journals.uio.no/index.php/osla/article/view/75/199'),
        ),
        Record(
            'UNPUBLISHED',
            'HammarstroemEtAl2011Howmany',
            ('author', u"Hammarström, Harald and Nordhoff, Sebastian"),
            ('year', u"2011"),
            ('title', u"How many languages have so far been described?"),
            ('howpublished',
             u"Paper presented at NWO Endangered Languages Programme Conference, Leiden, April 2011"
             ),
        ),
Exemplo n.º 17
0
    def test_Record(self):
        from clld.lib.bibtex import Record, EntryType

        rec = Record('article', '1', author=['a', 'b'], editor='a and b')
        self.assertEqual(rec['author'], 'a and b')
        self.assertEqual(rec.get('author'), rec.getall('author'))
        self.assertEqual(rec['editor'], rec.get('editor'))
        self.assertEqual(rec.getall('editor'), ['a', 'b'])

        rec = Record('book',
                     '1',
                     title='The Title',
                     author='author',
                     editor='ed',
                     booktitle='bt',
                     school='s',
                     issue='i',
                     pages='1-4',
                     publisher='M',
                     note="Revised edition")
        self.assertIn('@book', rec.__unicode__())
        self.assertIn('@book', rec.__str__())
        self.assertIn('bt', rec.text())

        for fmt in ['txt', 'en', 'ris', 'mods']:
            rec.format(fmt)

        Record.from_string(rec.__unicode__(), lowercase=True)
        Record.from_object(Mock())

        rec = Record('incollection',
                     '1',
                     title='The Title',
                     editor='ed',
                     booktitle='bt',
                     school='s',
                     issue='i',
                     pages='1-4',
                     publisher='M',
                     note="Revised edition")
        self.assertIn('In ', rec.text())

        rec = Record('article',
                     '1',
                     title='The Title',
                     journal='The Journal',
                     volume="The volume",
                     issue='issue')
        self.assertTrue('The Journal' in rec.text())

        rec = Record('xmisc', '1', note='Something')
        self.assertEqual(rec.genre, EntryType.misc)
        self.assertIn('Something', rec.text())