Пример #1
0
class Cache(object):
    def __init__(self, dir_=None):
        self._dir = Path(dir_ or CACHE_DIR)
        if not self._dir.exists():
            self._dir.mkdir(parents=True)  # pragma: no cover

    def _path(self, key):
        return self._dir.joinpath(path_component(key))

    def __len__(self):
        return len(list(self.keys()))

    def __getitem__(self, item):
        with self._path(item).open('rb') as fp:
            return pickle.load(fp)

    def __setitem__(self, key, value):
        with self._path(key).open('wb') as fp:
            pickle.dump(value, fp)

    def __delitem__(self, key):
        remove(self._path(key))

    def __contains__(self, item):
        return self._path(item).exists()

    def keys(self):
        for p in self._dir.iterdir():
            yield as_unicode(p.name)

    def clear(self):
        for key in self.keys():
            remove(self._path(key))
Пример #2
0
def llod_func(args):  # pragma: no cover
    """Create an RDF dump and compute some statistics about it."""
    tmp = Path(mkdtemp())
    count_rsc = 0
    count_triples = 0

    tmp_dump = tmp.joinpath('rdf.n3')
    with open(as_posix(tmp_dump), 'w') as fp:
        for rsc in RESOURCES:
            args.log.info('Resource type %s ...' % rsc.name)
            try:
                q = DBSession.query(rsc.model)
            except InvalidRequestError:
                args.log.info('... skipping')
                continue
            for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
                graph = get_graph(obj, args.env['request'], rsc.name)
                count_triples += len(graph)
                count_rsc += 1
                fp.write(n3(graph, with_head=count_rsc == 1))
            args.log.info('... finished')

    # put in args.data_file('..', 'static', 'download')?
    md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
    md.update(count_links(as_posix(tmp_dump)))
    jsonlib.dump(md, args.data_file('rdf-metadata.json'))
    print(md)

    dataset = Dataset.first()
    rdf_dump = args.module_dir.joinpath(
        'static', 'download', '%s-dataset.n3' % dataset.id)
    tmp_dump.copy(rdf_dump)
    check_call('gzip -f %s' % rdf_dump, shell=True)
    print(str(rdf_dump))
Пример #3
0
 def write_info(self, outdir=None):
     outdir = outdir or self.dir
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if outdir.name != self.id:
         outdir = outdir.joinpath(self.id)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(INFO_FILENAME)
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
Пример #4
0
    def __init__(self, name, dir_=None, default=None, **kw):
        """Initialization.

        :param name: Basename for the config file (suffix .ini will be appended).
        :param default: Default content of the config file.
        """
        INI.__init__(self, kw, allow_no_value=True)
        self.name = name
        config_dir = Path(dir_ or CONFIG_DIR)

        if default:
            if isinstance(default, text_type):
                self.read_string(default)
            #elif isinstance(default, (dict, OrderedDict)):
            #    self.read_dict(default)

        cfg_path = config_dir.joinpath(name + '.ini')
        if cfg_path.exists():
            assert cfg_path.is_file()
            self.read(cfg_path.as_posix())
        else:
            if not config_dir.exists():
                try:
                    config_dir.mkdir()
                except OSError:  # pragma: no cover
                    # this happens when run on travis-ci, by a system user.
                    pass
            if config_dir.exists():
                self.write(cfg_path.as_posix())
        self.path = cfg_path
Пример #5
0
    def write(self, outdir='.', suffix='.csv', cited_sources_only=False, archive=False):
        outdir = Path(outdir)
        if not outdir.exists():
            raise ValueError(outdir.as_posix())

        close = False
        if archive:
            if isinstance(archive, Archive):
                container = archive
            else:
                container = Archive(outdir.joinpath(self.name + '.zip'), mode='w')
                close = True
        else:
            container = outdir

        fname = Path(outdir).joinpath(self.name + suffix)
        if fname.suffix in TAB_SUFFIXES:
            self.table.dialect.delimiter = '\t'

        with UnicodeWriter(
                None if isinstance(container, Archive) else fname,
                delimiter=self.table.dialect.delimiter) as writer:
            writer.writerow(self.fields)
            for row in self.rows:
                writer.writerow(row.to_list())

        if isinstance(container, Archive):
            container.write_text(writer.read(), fname.name)
        self.table.url = fname.name

        self.metadata.write(Dataset.filename(fname, 'metadata'), container)
        ids = self._cited_sources if cited_sources_only else None
        self.sources.write(Dataset.filename(fname, 'sources'), container, ids=ids)
        if close:
            container.close()
Пример #6
0
    def __init__(self, name, default=None, **kw):
        """Initialization.

        :param name: Basename for the config file (suffix .ini will be appended).
        :param default: Default content of the config file.
        """
        self.name = name
        self.default = default
        config_dir = Path(kw.pop('config_dir', None) or DIR)
        RawConfigParser.__init__(self, kw, allow_no_value=True)
        if self.default:
            if PY3:
                fp = io.StringIO(self.default)
            else:
                fp = io.BytesIO(self.default.encode('utf8'))
            self.readfp(fp)

        cfg_path = config_dir.joinpath(name + '.ini')
        if cfg_path.exists():
            assert cfg_path.is_file()
            self.read(cfg_path.as_posix())
        else:
            if not config_dir.exists():
                try:
                    config_dir.mkdir()
                except OSError:  # pragma: no cover
                    # this happens when run on travis-ci, by a system user.
                    pass
            if config_dir.exists():
                with open(cfg_path.as_posix(), 'w') as fp:
                    self.write(fp)
        self.path = cfg_path
Пример #7
0
    def test_freeze(self):
        from clld.scripts.freeze import freeze_func, unfreeze_func

        tmp = Path(mkdtemp())
        tmp.joinpath('data').mkdir()
        tmp.joinpath('appname').mkdir()

        class Args(object):
            env = self.env
            module_dir = tmp.joinpath('appname').resolve()
            module = Mock(__name__='appname')

            def data_file(self, *comps):
                return tmp.resolve().joinpath('data', *comps)

        DBSession.flush()
        args = Args()
        freeze_func(args, dataset=Dataset.first(), with_history=False)
        self.assert_(tmp.joinpath('data.zip').exists())

        engine = create_engine('sqlite://')
        Base.metadata.create_all(engine)
        self.assertEqual(
            engine.execute('select count(*) from language').fetchone()[0], 0)
        unfreeze_func(args, engine=engine)

        s1 = DBSession
        s2 = sessionmaker(bind=engine)()
        self.assertEqual(
            s1.query(Language).count(),
            s2.query(Language).count())

        l1 = s1.query(Language).filter(Language.latitude != null()).first()
        l2 = s2.query(Language).filter(Language.pk == l1.pk).first()
        self.assertEqual(l1.created, l2.created)
        self.assertEqual(l1.latitude, l2.latitude)
        self.assertEqual(l1.description, l2.description)

        contrib = s2.query(Contribution).filter(
            Contribution.id == 'contribution').one()
        self.assert_(contrib.primary_contributors)
        self.assert_(contrib.secondary_contributors)

        rmtree(tmp, ignore_errors=True)
Пример #8
0
    def test_freeze(self):
        from clld.scripts.freeze import freeze_func, unfreeze_func

        tmp = Path(mkdtemp())
        tmp.joinpath('data').mkdir()
        tmp.joinpath('appname').mkdir()

        class Args(object):
            env = self.env
            module_dir = tmp.joinpath('appname').resolve()
            module = Mock(__name__='appname')

            def data_file(self, *comps):
                return tmp.resolve().joinpath('data', *comps)

        DBSession.flush()
        args = Args()
        freeze_func(args, dataset=Dataset.first(), with_history=False)
        self.assert_(tmp.joinpath('data.zip').exists())

        engine = create_engine('sqlite://')
        Base.metadata.create_all(engine)
        self.assertEqual(
            engine.execute('select count(*) from language').fetchone()[0], 0)
        unfreeze_func(args, engine=engine)

        s1 = DBSession
        s2 = sessionmaker(bind=engine)()
        self.assertEqual(s1.query(Language).count(), s2.query(Language).count())

        l1 = s1.query(Language).filter(Language.latitude != null()).first()
        l2 = s2.query(Language).filter(Language.pk == l1.pk).first()
        self.assertEqual(l1.created, l2.created)
        self.assertEqual(l1.latitude, l2.latitude)
        self.assertEqual(l1.description, l2.description)

        contrib = s2.query(Contribution).filter(Contribution.id == 'contribution').one()
        self.assert_(contrib.primary_contributors)
        self.assert_(contrib.secondary_contributors)

        rmtree(tmp, ignore_errors=True)
Пример #9
0
    def create(self, dir_, content):
        """Write ``content`` to a file using ``dir_`` as file-system directory.

        :return: File-system path of the file that was created.
        """
        if not isinstance(dir_, Path):
            dir_ = Path(dir_)
        p = dir_.joinpath(self.relpath)
        if not p.parent.exists():
            p.parent.mkdir(parents=True)
        with open(p.as_posix(), 'wb') as fp:
            fp.write(content)
        return p.as_posix()
Пример #10
0
    def create(self, dir_, content):
        """Write ``content`` to a file using ``dir_`` as file-system directory.

        :return: File-system path of the file that was created.
        """
        if not isinstance(dir_, Path):
            dir_ = Path(dir_)
        p = dir_.joinpath(self.relpath)
        if not p.parent.exists():
            p.parent.mkdir(parents=True)
        with open(p.as_posix(), 'wb') as fp:
            fp.write(content)
        return p.as_posix()
Пример #11
0
def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(
        os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexirumah.__name__,
            name="lexirumah",
            publisher_name=
            "Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexirumah.model-ling.eu',
            contact='*****@*****.**',
            jsondata={
                'license_icon':
                'cc-by.png',
                'license_name':
                'Creative Commons Attribution 4.0 International License'
            })
        DBSession.add(dataset)

    glottolog_repos = Path(
        lexirumah.__file__).parent.parent.parent.parent.joinpath(
            'glottolog3', 'glottolog')
    languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()}
    concepticon = Concepticon(
        Path(lexirumah.__file__).parent.parent.parent.parent.joinpath(
            'concepticon', 'concepticon-data'))
    conceptsets = {c.id: c for c in concepticon.conceptsets.values()}

    skip = True
    for dname in sorted(repos.joinpath('datasets').iterdir(),
                        key=lambda p: p.name):
        #if dname.name == 'benuecongo':
        #    skip = False
        #if skip:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            mdpath = dname.joinpath('cldf', 'metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(Data(),
                      DBSession.query(LexiRumahLanguage),
                      glottolog_repos=glottolog_repos,
                      isolates_icon='tcccccc')
Пример #12
0
 def write_info(self, outdir=None):
     outdir = outdir or self.id
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(self.fname('.ini'))
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
Пример #13
0
    def languoids(self, ids=None, maxlevel=models.Level.dialect):
        nodes = {}

        for dirpath, dirnames, filenames in os.walk(as_posix(self.tree)):
            dp = Path(dirpath)
            if dp.name in nodes and nodes[dp.name][2] > maxlevel:
                del dirnames[:]

            for dirname in dirnames:
                if ids is None or dirname in ids:
                    lang = languoids.Languoid.from_dir(dp.joinpath(dirname),
                                                       nodes=nodes)
                    if lang.level <= maxlevel:
                        yield lang
Пример #14
0
class WithTempDirMixin(object):
    """Composable test fixture providing access to a temporary directory.

    http://nedbatchelder.com/blog/201210/multiple_inheritance_is_hard.html
    """
    def setUp(self):
        super(WithTempDirMixin, self).setUp()
        self.tmp = Path(tempfile.mkdtemp())

    def tearDown(self):
        rmtree(self.tmp, ignore_errors=True)
        super(WithTempDirMixin, self).tearDown()

    def tmp_path(self, *comps):
        return self.tmp.joinpath(*comps)
Пример #15
0
    def from_metadata(cls, fname):
        fname = Path(fname)
        if fname.is_dir():
            name = '{0}{1}'.format(cls.__name__, MD_SUFFIX)
            tablegroup = TableGroup.from_file(pkg_path('modules', name))
            # adapt the path of the metadata file such that paths to tables are resolved
            # correctly:
            tablegroup._fname = fname.joinpath(name)
        else:
            tablegroup = TableGroup.from_file(fname)

        for mod in get_modules():
            if mod.match(tablegroup):
                return mod.cls(tablegroup)
        return cls(tablegroup)
Пример #16
0
class WithTempDirMixin(object):
    """
    Composable test fixture providing access to a temporary directory.

    http://nedbatchelder.com/blog/201210/multiple_inheritance_is_hard.html
    """
    def setUp(self):
        super(WithTempDirMixin, self).setUp()
        self.tmp = Path(mkdtemp())

    def tearDown(self):
        rmtree(self.tmp, ignore_errors=True)
        super(WithTempDirMixin, self).tearDown()

    def tmp_path(self, *comps):
        return self.tmp.joinpath(*comps)
Пример #17
0
def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexirumah.__name__,
            name="lexirumah",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexirumah.model-ling.eu',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog_repos = Path(
        lexirumah.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog')
    languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()}
    concepticon = Concepticon(
        Path(lexirumah.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c.id: c for c in concepticon.conceptsets.values()}

    skip = True
    for dname in sorted(repos.joinpath('datasets').iterdir(), key=lambda p: p.name):
        #if dname.name == 'benuecongo':
        #    skip = False
        #if skip:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            mdpath = dname.joinpath('cldf', 'metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexiRumahLanguage),
            glottolog_repos=glottolog_repos,
            isolates_icon='tcccccc')
Пример #18
0
def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexibank/lexibank-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexibank.__name__,
            name="lexibank",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexibank.clld.org',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog = Glottolog(
        Path(lexibank.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog'))
    languoids = {l.id: l for l in glottolog.languoids()}
    concepticon = Concepticon(
        Path(lexibank.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c['ID']: c for c in concepticon.conceptsets()}

    for dname in repos.joinpath('datasets').iterdir():
        #if dname.name not in ['acbd']:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            #if dname.name != 'zenodo34092':
            #    continue
            mdpath = dname.joinpath('metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexibankLanguage),
            glottolog=languoids,
            isolates_icon='tcccccc')
Пример #19
0
class API(UnicodeMixin):
    """An API base class to provide programmatic access to data in a git repository."""

    # A light-weight way to specifiy a default repository location (without having to
    # overwrite __init__)
    __repos_path__ = None

    def __init__(self, repos=None):
        self.repos = Path(repos or self.__repos_path__)

    def __unicode__(self):
        name = self.repos.resolve().name if self.repos.exists(
        ) else self.repos.name
        return '<{0} repository {1} at {2}>'.format(name,
                                                    git_describe(self.repos),
                                                    self.repos)

    def path(self, *comps):
        return self.repos.joinpath(*comps)

    @property
    def appdir(self):
        return self.path('app')

    @property
    def appdatadir(self):
        return self.appdir.joinpath('data')

    @classmethod
    def app_wrapper(cls, func):
        @wraps(func)
        def wrapper(args):
            api = cls(args.repos)
            if not api.appdatadir.exists() or '--recreate' in args.args:
                api.appdatadir.mkdir(exist_ok=True)
                args.api = api
                func(args)
            index = api.appdir / 'index.html'
            if index.exists():
                webbrowser.open(index.resolve().as_uri())

        return wrapper
Пример #20
0
def unfreeze_func(args, engine=None):
    try:
        importlib.import_module(args.module.__name__)
    except ImportError:
        pass  # pragma: no cover
    engine = engine or DBSession.get_bind()
    data_dir = Path(mkdtemp())

    with ZipFile(as_posix(args.module_dir.joinpath('..', 'data.zip'))) as fp:
        fp.extractall(as_posix(data_dir))

    db_version = None
    for table in Base.metadata.sorted_tables:
        csv = data_dir.joinpath('%s.csv' % table.name)
        if csv.exists():
            db_version = load(table, csv, engine)

    if db_version:
        set_alembic_version(engine, db_version)  # pragma: no cover

    rmtree(data_dir)
Пример #21
0
def unfreeze_func(args, engine=None):
    try:
        importlib.import_module(args.module.__name__)
    except ImportError:
        pass  # pragma: no cover
    engine = engine or DBSession.get_bind()
    data_dir = Path(mkdtemp())

    with ZipFile(as_posix(args.module_dir.joinpath('..', 'data.zip'))) as fp:
        fp.extractall(as_posix(data_dir))

    db_version = None
    for table in Base.metadata.sorted_tables:
        csv = data_dir.joinpath('%s.csv' % table.name)
        if csv.exists():
            db_version = load(table, csv, engine)

    if db_version:
        set_alembic_version(engine, db_version)

    rmtree(data_dir)
Пример #22
0
def llod_func(args):  # pragma: no cover
    """Create an RDF dump and compute some statistics about it."""
    tmp = Path(mkdtemp())
    count_rsc = 0
    count_triples = 0

    tmp_dump = tmp.joinpath('rdf.n3')
    with open(as_posix(tmp_dump), 'w') as fp:
        for rsc in RESOURCES:
            args.log.info('Resource type %s ...' % rsc.name)
            try:
                q = DBSession.query(rsc.model)
            except InvalidRequestError:
                args.log.info('... skipping')
                continue
            for obj in page_query(q.order_by(rsc.model.pk),
                                  n=10000,
                                  verbose=True):
                graph = get_graph(obj, args.env['request'], rsc.name)
                count_triples += len(graph)
                count_rsc += 1
                fp.write(n3(graph, with_head=count_rsc == 1))
            args.log.info('... finished')

    # put in args.data_file('..', 'static', 'download')?
    md = {
        'path': as_posix(tmp),
        'resources': count_rsc,
        'triples': count_triples
    }
    md.update(count_links(as_posix(tmp_dump)))
    jsonlib.dump(md, args.data_file('rdf-metadata.json'))
    print(md)

    dataset = Dataset.first()
    rdf_dump = args.module_dir.joinpath('static', 'download',
                                        '%s-dataset.n3' % dataset.id)
    tmp_dump.copy(rdf_dump)
    check_call('gzip -f %s' % rdf_dump, shell=True)
    print(str(rdf_dump))
Пример #23
0
    def from_metadata(cls, fname):
        fname = Path(fname)
        if fname.is_dir():
            name = '{0}{1}'.format(cls.__name__, MD_SUFFIX)
            tablegroup = TableGroup.from_file(pkg_path('modules', name))
            # adapt the path of the metadata file such that paths to tables are resolved
            # correctly:
            tablegroup._fname = fname.joinpath(name)
        else:
            tablegroup = TableGroup.from_file(fname)

        comps = Counter()
        for table in tablegroup.tables:
            try:
                comps.update([Dataset.get_tabletype(table)])
            except ValueError:
                pass
        if comps and comps.most_common(1)[0][1] > 1:
            raise ValueError('{0}: duplicate components!'.format(fname))

        for mod in get_modules():
            if mod.match(tablegroup):
                return mod.cls(tablegroup)
        return cls(tablegroup)
Пример #24
0
    def from_metadata(cls, fname):
        fname = Path(fname)
        if fname.is_dir():
            name = '{0}{1}'.format(cls.__name__, MD_SUFFIX)
            tablegroup = TableGroup.from_file(pkg_path('modules', name))
            # adapt the path of the metadata file such that paths to tables are resolved
            # correctly:
            tablegroup._fname = fname.joinpath(name)
        else:
            tablegroup = TableGroup.from_file(fname)

        comps = Counter()
        for table in tablegroup.tables:
            try:
                comps.update([Dataset.get_tabletype(table)])
            except ValueError:
                pass
        if comps and comps.most_common(1)[0][1] > 1:
            raise ValueError('{0}: duplicate components!'.format(fname))

        for mod in get_modules():
            if mod.match(tablegroup):
                return mod.cls(tablegroup)
        return cls(tablegroup)
Пример #25
0
class Repos(UnicodeMixin):
    def __init__(self, dir_):
        self.dir = Path(dir_)
        self.datasets = [
            Dataset(base_dir=self.dir.joinpath('datasets'), **r) for r in
            reader(self.dir.joinpath('datasets', 'index.csv'), dicts=True)]
        self.phylogenies = [
            Phylogeny(base_dir=self.dir.joinpath('phylogenies'), **r) for r in
            reader(self.dir.joinpath('phylogenies', 'index.csv'), dicts=True)]
        self.societies = {
            s.id: s for s in chain.from_iterable(d.societies for d in self.datasets)
        }
        self.variables = {
            v.id: v for v in chain.from_iterable(d.societies for d in self.datasets)
        }
        self.sources = BibFile(self.dir.joinpath('datasets', 'sources.bib'))

    def __unicode__(self):
        return '<D-PLACE data repos {0} at {1}>'.format(git_describe(self.dir), self.dir)

    def path(self, *comps):
        return self.dir.joinpath(*comps)

    def read_csv(self, *comps, **kw):
        return list(reader(self.path(*comps), **kw))

    def read_json(self, *comps):
        return jsonlib.load(self.path(*comps))
    
    def iter_data(self, datasets=None, variables=None, societies=None):
        for ds in self.datasets:
            if datasets and ds.id in datasets:
                for record in ds.data:
                    if variables and record.var_id not in variables:
                        continue
                    if societies and record.soc_id not in societies:
                        continue
                    yield record
Пример #26
0
def includeme(config):
    """Upgrading:

    - register utilities "by hand", after config.include('clld.web.app')
    - add routes by hand (and remove these from the **kw passed to Configurator)

    :param config:
    :return:
    """
    #
    # now we exploit the default package layout as created via the CLLD scaffold:
    #
    # note: the following exploits the import time side effect of modifying the webassets
    # environment!
    root_package = config.root_package.__name__
    pkg_dir = Path(config.root_package.__file__).parent.resolve()
    maybe_import('%s.assets' % root_package, pkg_dir=pkg_dir)

    json_renderer = JSON()
    json_renderer.add_adapter(datetime.datetime,
                              lambda obj, req: obj.isoformat())
    json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('json', json_renderer)

    jsonp_renderer = JSONP(param_name='callback')
    jsonp_renderer.add_adapter(datetime.datetime,
                               lambda obj, req: obj.isoformat())
    jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('jsonp', jsonp_renderer)

    config.set_request_factory(ClldRequest)
    config.registry.registerUtility(CtxFactoryQuery(),
                                    interfaces.ICtxFactoryQuery)
    config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig)
    config.registry.registerUtility(CldfConfig(), interfaces.ICldfConfig)

    # initialize the db connection
    engine = engine_from_config(config.registry.settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    Base.metadata.bind = engine

    try:
        git_tag = git_describe(Path(pkg_dir).parent)
    except ValueError:  # pragma: no cover
        git_tag = None

    config.add_settings({
        'pyramid.default_locale_name': 'en',
        'clld.pkg': root_package,
        'clld.git_tag': git_tag,
        'clld.parameters': {}
    })
    if 'clld.files' in config.registry.settings:
        # deployment-specific location of static data files
        abspath = Path(config.registry.settings['clld.files']).resolve()
        config.add_settings({'clld.files': abspath})
        config.add_static_view('files', str(abspath))

    # event subscribers:
    config.add_subscriber(add_localizer, events.NewRequest)
    config.add_subscriber(init_map, events.ContextFound)
    config.add_subscriber(
        partial(add_renderer_globals,
                maybe_import('%s.util' % root_package, pkg_dir=pkg_dir)),
        events.BeforeRender)

    #
    # make it easy to register custom functionality
    #
    for name, func in {
            'register_utility': register_utility,
            'register_datatable': partial(register_cls, interfaces.IDataTable),
            'register_map': partial(register_cls, interfaces.IMap),
            'register_menu': register_menu,
            'register_resource': register_resource,
            'register_adapter': register_adapter,
            'register_adapters': register_adapters,
            'register_download': register_download,
            'register_staticresource': register_staticresource,
            'add_route_and_view': add_route_and_view,
            'add_settings_from_file': add_settings_from_file,
            'add_301': add_301,
            'add_410': add_410,
            'add_page': add_page,
            'register_resource_routes_and_views':
            register_resource_routes_and_views,
    }.items():
        config.add_directive(name, func)

    #
    # routes and views
    #
    config.add_static_view('clld-static', 'clld:web/static')
    config.add_static_view('static', '%s:static' % root_package)

    config.add_route_and_view('_js', '/_js', js, http_cache=3600)

    # add some maintenance hatches
    config.add_route_and_view('_raise', '/_raise', _raise)
    config.add_route_and_view('_ping', '/_ping', _ping, renderer='json')

    # sitemap support:
    config.add_route_and_view('robots', '/robots.txt', robots)
    config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex)
    config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap)
    config.add_route('resourcemap', '/resourcemap.json')
    config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp')
    config.add_route_and_view('select_combination', '/_select_combination',
                              select_combination)

    config.add_route_and_view('unapi', '/unapi', unapi)
    config.add_route_and_view('olac', '/olac', olac)

    config.add_settings_from_file(pkg_dir.joinpath('appconf.ini'))
    if not config.registry.settings.get('mako.directories'):
        config.add_settings({'mako.directories': ['clld:web/templates']})

    for rsc in RESOURCES:
        config.register_resource_routes_and_views(rsc)
        config.register_datatable(
            rsc.plural, getattr(datatables, rsc.plural.capitalize(),
                                DataTable))
        register_resource_adapters(config, rsc)

    # maps
    config.register_map('languages', Map)
    config.register_map('language', LanguageMap)
    config.register_map('parameter', ParameterMap)
    config.register_map('combination', CombinationMap)

    config.include('clld.web.adapters')

    for icon in ICONS:
        config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name)
    config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList)
    config.registry.registerUtility(MapMarker(), interfaces.IMapMarker)

    #
    # inspect default locations for views and templates:
    #
    home_comp = OrderedDict()
    for name, template in [
        ('introduction', False),
        ('about', False),
        ('terms', False),
        ('glossary', False),
        ('history', False),
        ('changes', False),
        ('credits', False),
        ('legal', True),
        ('download', True),
        ('contact', True),
        ('help', False),
    ]:
        home_comp[name] = template

    if pkg_dir.joinpath('templates').exists():
        for p in pkg_dir.joinpath('templates').iterdir():
            if p.stem in home_comp and p.suffix == '.mako':
                home_comp[p.stem] = True

    for name, template in home_comp.items():
        if template:
            config.add_page(name)

    config.add_settings(
        {'home_comp': [k for k in home_comp.keys() if home_comp[k]]})

    if 'clld.favicon' not in config.registry.settings:
        favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'}
        # hard to test (in particular on travis) and without too much consequence
        # (and the consequences faced are easy to spot).
        if pkg_dir.joinpath('static',
                            'favicon.ico').exists():  # pragma: no cover
            favicon['clld.favicon'] = root_package + ':static/favicon.ico'
        config.add_settings(favicon)

    config.add_settings({
        'clld.favicon_hash':
        md5(abspath_from_asset_spec(config.registry.settings['clld.favicon']))
    })

    translation_dirs = ['clld:locale']
    if pkg_dir.joinpath('locale').exists():
        translation_dirs.append('%s:locale' % root_package)  # pragma: no cover
    config.add_translation_dirs(*translation_dirs)

    if pkg_dir.joinpath(
            'static/publisher_logo.png').exists():  # pragma: no cover
        config.add_settings({
            'clld.publisher_logo':
            '%s:static/publisher_logo.png' % root_package
        })

    if asbool(config.registry.settings.get('clld.pacific_centered_maps')):
        geojson.pacific_centered()

    v = maybe_import('%s.views' % root_package, pkg_dir=pkg_dir)
    if v:
        config.scan(v)  # pragma: no cover

    menuitems = config.registry.settings.get(
        'clld.menuitems_list',
        ['contributions', 'parameters', 'languages', 'contributors'])
    config.register_menu(('dataset', dict(label='Home')), *menuitems)

    config.include('pyramid_mako')

    for name in ['adapters', 'datatables', 'maps']:
        mod = maybe_import('%s.%s' % (root_package, name), pkg_dir=pkg_dir)
        if mod and hasattr(mod, 'includeme'):
            config.include(mod)

    config.register_download(CldfDownload(common.Dataset, root_package))
Пример #27
0
def includeme(config):
    """Upgrading:

    - register utilities "by hand", after config.include('clld.web.app')
    - add routes by hand (and remove these from the **kw passed to Configurator)

    :param config:
    :return:
    """
    #
    # now we exploit the default package layout as created via the CLLD scaffold:
    #
    # note: the following exploits the import time side effect of modifying the webassets
    # environment!
    root_package = config.root_package.__name__
    maybe_import('%s.assets' % root_package)

    pkg_dir = Path(config.root_package.__file__).parent.resolve()

    json_renderer = JSON()
    json_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat())
    json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('json', json_renderer)

    jsonp_renderer = JSONP(param_name='callback')
    jsonp_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat())
    jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('jsonp', jsonp_renderer)

    config.set_request_factory(ClldRequest)
    config.registry.registerUtility(CtxFactoryQuery(), interfaces.ICtxFactoryQuery)
    config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig)

    # initialize the db connection
    engine = engine_from_config(config.registry.settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    Base.metadata.bind = engine

    config.add_settings({
        'pyramid.default_locale_name': 'en',
        'clld.pkg': root_package,
        'clld.parameters': {}})
    if 'clld.files' in config.registry.settings:
        # deployment-specific location of static data files
        abspath = Path(config.registry.settings['clld.files']).resolve()
        config.add_settings({'clld.files': abspath})
        config.add_static_view('files', abspath.as_posix())

    # event subscribers:
    config.add_subscriber(add_localizer, events.NewRequest)
    config.add_subscriber(init_map, events.ContextFound)
    config.add_subscriber(
        partial(add_renderer_globals, maybe_import('%s.util' % root_package)),
        events.BeforeRender)

    #
    # make it easy to register custom functionality
    #
    for name, func in {
        'register_datatable': partial(register_cls, interfaces.IDataTable),
        'register_map': partial(register_cls, interfaces.IMap),
        'register_menu': register_menu,
        'register_resource': register_resource,
        'register_adapter': register_adapter,
        'register_adapters': register_adapters,
        'register_download': register_download,
        'register_staticresource': register_staticresource,
        'add_route_and_view': add_route_and_view,
        'add_settings_from_file': add_settings_from_file,
        'add_301': add_301,
        'add_410': add_410,
        'add_page': add_page,
        'register_resource_routes_and_views': register_resource_routes_and_views,
    }.items():
        config.add_directive(name, func)

    #
    # routes and views
    #
    config.add_static_view('clld-static', 'clld:web/static')
    config.add_static_view('static', '%s:static' % root_package)

    config.add_route_and_view('_js', '/_js', js, http_cache=3600)

    # add some maintenance hatches
    config.add_route_and_view('_raise', '/_raise', _raise)
    config.add_route_and_view('_ping', '/_ping', _ping, renderer='json')

    # sitemap support:
    config.add_route_and_view('robots', '/robots.txt', robots)
    config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex)
    config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap)
    config.add_route('resourcemap', '/resourcemap.json')
    config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp')
    config.add_route_and_view(
        'select_combination', '/_select_combination', select_combination)

    config.add_route_and_view('unapi', '/unapi', unapi)
    config.add_route_and_view('olac', '/olac', olac)

    config.add_settings_from_file(pkg_dir.joinpath('appconf.ini'))
    if not config.registry.settings.get('mako.directories'):
        config.add_settings({'mako.directories': ['clld:web/templates']})

    for rsc in RESOURCES:
        config.register_resource_routes_and_views(rsc)
        config.register_datatable(
            rsc.plural, getattr(datatables, rsc.plural.capitalize(), DataTable))
        register_resource_adapters(config, rsc)

    # maps
    config.register_map('languages', Map)
    config.register_map('language', LanguageMap)
    config.register_map('parameter', ParameterMap)
    config.register_map('combination', CombinationMap)

    config.include('clld.web.adapters')

    for icon in ICONS:
        config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name)
    config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList)
    config.registry.registerUtility(MapMarker(), interfaces.IMapMarker)

    #
    # inspect default locations for views and templates:
    #
    home_comp = OrderedDict()
    for name, template in [
        ('introduction', False),
        ('about', False),
        ('terms', False),
        ('glossary', False),
        ('history', False),
        ('changes', False),
        ('credits', False),
        ('legal', True),
        ('download', True),
        ('contact', True),
        ('help', False),
    ]:
        home_comp[name] = template

    if pkg_dir.joinpath('templates').exists():
        for p in pkg_dir.joinpath('templates').iterdir():
            if p.stem in home_comp and p.suffix == '.mako':
                home_comp[p.stem] = True

    for name, template in home_comp.items():
        if template:
            config.add_page(name)

    config.add_settings({'home_comp': [k for k in home_comp.keys() if home_comp[k]]})

    if 'clld.favicon' not in config.registry.settings:
        favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'}
        # hard to test (in particular on travis) and without too much consequence
        # (and the consequences faced are easy to spot).
        if pkg_dir.joinpath('static', 'favicon.ico').exists():  # pragma: no cover
            favicon['clld.favicon'] = root_package + ':static/favicon.ico'
        config.add_settings(favicon)

    with open(abspath_from_asset_spec(
            config.registry.settings['clld.favicon']), mode='rb') as fp:
        fh = md5()
        fh.update(fp.read())
        config.add_settings({'clld.favicon_hash': fh.hexdigest()})

    translation_dirs = ['clld:locale']
    if pkg_dir.joinpath('locale').exists():
        translation_dirs.append('%s:locale' % root_package)  # pragma: no cover
    config.add_translation_dirs(*translation_dirs)

    if pkg_dir.joinpath('static/publisher_logo.png').exists():  # pragma: no cover
        config.add_settings(
            {'clld.publisher_logo': '%s:static/publisher_logo.png' % root_package})

    if asbool(config.registry.settings.get('clld.pacific_centered_maps')):
        geojson.pacific_centered()

    v = maybe_import('%s.views' % root_package)
    if v:
        config.scan(v)  # pragma: no cover

    menuitems = config.registry.settings.get(
        'clld.menuitems_list',
        ['contributions', 'parameters', 'languages', 'contributors'])
    config.register_menu(('dataset', dict(label='Home')), *menuitems)

    config.include('pyramid_mako')

    for name in ['adapters', 'datatables', 'maps']:
        mod = maybe_import('%s.%s' % (root_package, name))
        if mod and hasattr(mod, 'includeme'):
            config.include(mod)

    config.register_download(CldfDownload(common.Dataset, root_package))
Пример #28
0
def reflexes(write_stats=True, path='concepticondata'):
    """
    Returns a dictionary with concept set label as value and tuples of concept
    list identifier and concept label as values.
    """
    D, G = {}, {}
    cpl = 0
    cln = 0
    clb = set([])
    
    dpath = Path(path) if path else PKG_PATH
    
    for i, cl in enumerate(dpath.joinpath('conceptlists').glob('*.tsv')):
        concepts = list(reader(cl, namedtuples=True, delimiter="\t"))
        for j,concept in enumerate([c for c in concepts if c.CONCEPTICON_ID]):
            label = concept.GLOSS if hasattr(concept, 'GLOSS') else concept.ENGLISH
            name = cl.name
            try:
                D[concept.CONCEPTICON_GLOSS] += [(name, label)]
            except KeyError:
                D[concept.CONCEPTICON_GLOSS] = [(name, label)]
            try:
                G[label] += [(concept.CONCEPTICON_ID, concept.CONCEPTICON_GLOSS, name)]
            except KeyError:
                G[label] = [(concept.CONCEPTICON_ID, concept.CONCEPTICON_GLOSS, name)]
            clb.add(label)
            cpl += 1
        cln += 1
    # write basic statistics and most frequent glosses
    if write_stats:
        txt = """# Concepticon Statistics
* concept sets (used): {0}
* concept lists: {1}
* concept labels: {2}
* concept labels (unique): {3}
* Ø concepts per list: {4:.2f}
* Ø concepts per concept set: {5:.2f}
* Ø unique concept labels per concept set: {6:.2f}

"""
        txt = txt.format(
            len(D),
            cln,
            cpl,
            len(clb),
            cpl / cln,
            sum([len(v) for k,v in D.items()]) / len(D),
            sum([len(set([label for _,label in v])) for k,v in D.items()]) / len(D)
            )
        
        txt += '# Twenty Most Diverse Concept Sets\n\n'
        txt += '| No. | concept set | distinct labels | concept lists | examples |\n'
        txt += '| --- | --- | --- | --- | --- |\n'
        for i,(k,v) in enumerate(sorted(D.items(), key=lambda x: len(set([label for _,label in
            x[1]])), reverse=True)[:20]):
            txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format(
                    i+1,
                    k,
                    len(set([label for _,label in v])),
                    len(set([clist for clist,_ in v])),
                    ', '.join(sorted(set(['«{0}»'.format(label.replace('*','`*`')) for _,label in
                        v])))
                    )

        txt += '# Twenty Most Frequent Concept Sets\n\n'
        txt += '| No. | concept set | distinct labels | concept lists | examples |\n'
        txt += '| --- | --- | --- | --- | --- |\n'
        for i,(k,v) in enumerate(sorted(D.items(), key=lambda x: len(set([clist for clist,_ in
            x[1]])), reverse=True)[:20]):
            txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format(
                    i+1,
                    k,
                    len(set([label for _,label in v])),
                    len(set([clist for clist,_ in v])),
                    ', '.join(sorted(set(['«{0}»'.format(label.replace('*','`*`')) for _,label in
                        v])))
                    )

        with dpath.joinpath('README.md').open('w', encoding='utf8') as fp:
            fp.write(txt)

    return D, G
Пример #29
0
                                log.update(['subsubunit'])
                                res[fid]['subgroups'][sfid]['subgroups'][
                                    ssfid] = ddd
        else:
            # isolates:
            for l in langs:
                if l.med:
                    log.update(['isolate'])
                    res[l.id] = {
                        'name': l.name,
                        'doctype': l.med,
                        'macroareas': l.macroareas,
                        'extension': [l.id],
                    }

    outdir = Path(grambank.__file__).parent.joinpath('static')
    dump(res, outdir.joinpath('stats_by_classification.json'))

    stats = defaultdict(lambda: defaultdict(list))
    for fid, f in res.items():
        for maname, maid in f['macroareas']:
            stats[maid][f['doctype']].append(fid)

    dump(stats, outdir.joinpath('stats_by_macroarea.json'))

    macroareas = {ma.name: ma.value for ma in Macroarea}
    dump(macroareas, outdir.joinpath('stats_macroareas.json'))

    print(log)
    sys.exit(0)
Пример #30
0
 def write_info(self, outdir):
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     self.cfg.write(outdir.joinpath(self.fname('.ini')))
Пример #31
0
def reflexes(write_stats=True, path='concepticondata'):
    """
    Returns a dictionary with concept set label as value and tuples of concept
    list identifier and concept label as values.
    """
    D, G = {}, {}
    cpl = 0
    cln = 0
    clb = set([])

    dpath = Path(path) if path else PKG_PATH

    for i, cl in enumerate(dpath.joinpath('conceptlists').glob('*.tsv')):
        concepts = list(reader(cl, namedtuples=True, delimiter="\t"))
        for j, concept in enumerate([c for c in concepts if c.CONCEPTICON_ID]):
            label = concept.GLOSS if hasattr(concept,
                                             'GLOSS') else concept.ENGLISH
            name = cl.name
            try:
                D[concept.CONCEPTICON_GLOSS] += [(name, label)]
            except KeyError:
                D[concept.CONCEPTICON_GLOSS] = [(name, label)]
            try:
                G[label] += [(concept.CONCEPTICON_ID,
                              concept.CONCEPTICON_GLOSS, name)]
            except KeyError:
                G[label] = [(concept.CONCEPTICON_ID, concept.CONCEPTICON_GLOSS,
                             name)]
            clb.add(label)
            cpl += 1
        cln += 1
    # write basic statistics and most frequent glosses
    if write_stats:
        txt = """# Concepticon Statistics
* concept sets (used): {0}
* concept lists: {1}
* concept labels: {2}
* concept labels (unique): {3}
* Ø concepts per list: {4:.2f}
* Ø concepts per concept set: {5:.2f}
* Ø unique concept labels per concept set: {6:.2f}

"""
        txt = txt.format(
            len(D), cln, cpl, len(clb), cpl / cln,
            sum([len(v) for k, v in D.items()]) / len(D),
            sum([len(set([label for _, label in v]))
                 for k, v in D.items()]) / len(D))

        txt += '# Twenty Most Diverse Concept Sets\n\n'
        txt += '| No. | concept set | distinct labels | concept lists | examples |\n'
        txt += '| --- | --- | --- | --- | --- |\n'
        for i, (k, v) in enumerate(
                sorted(D.items(),
                       key=lambda x: len(set([label for _, label in x[1]])),
                       reverse=True)[:20]):
            txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format(
                i + 1, k, len(set([label for _, label in v])),
                len(set([clist for clist, _ in v])), ', '.join(
                    sorted(
                        set([
                            '«{0}»'.format(label.replace('*', '`*`'))
                            for _, label in v
                        ]))))

        txt += '# Twenty Most Frequent Concept Sets\n\n'
        txt += '| No. | concept set | distinct labels | concept lists | examples |\n'
        txt += '| --- | --- | --- | --- | --- |\n'
        for i, (k, v) in enumerate(
                sorted(D.items(),
                       key=lambda x: len(set([clist for clist, _ in x[1]])),
                       reverse=True)[:20]):
            txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format(
                i + 1, k, len(set([label for _, label in v])),
                len(set([clist for clist, _ in v])), ', '.join(
                    sorted(
                        set([
                            '«{0}»'.format(label.replace('*', '`*`'))
                            for _, label in v
                        ]))))

        with dpath.joinpath('README.md').open('w', encoding='utf8') as fp:
            fp.write(txt)

    return D, G
Пример #32
0
class Dictionary(object):
    def __init__(self, filename, **kw):
        kw.setdefault('entry_impl', Entry)
        kw['marker_map'] = kw.get('marker_map') or {}
        lexeme_marker = 'lx'
        reverse_marker_map = {v: k for k, v in kw['marker_map'].items()}
        if lexeme_marker in reverse_marker_map:
            lexeme_marker = reverse_marker_map[lexeme_marker]
            kw.setdefault('entry_prefix', '\\lx ')
        kw.setdefault('entry_sep', '\\%s ' % lexeme_marker)
        self.sfm = sfm.SFM.from_file(filename, **kw)
        self.dir = Path(filename).parent

    #def validated(self, entry):
    #    entry = sfm.Dictionary.validated(self, entry)
    #    return entry.preprocessed()

    def stats(self):
        stats = Stats()
        self.sfm.visit(stats)
        print(stats.count)
        print(stats._mult_markers)
        print(stats._implicit_mult_markers)

    def process(self, outfile):
        """extract examples, etc."""
        assert self.dir.name != 'processed'

        self.sfm.visit(Rearrange())

        with self.dir.joinpath('examples.log').open('w', encoding='utf8') as log:
            extractor = ExampleExtractor(Corpus(self.dir), log)
            self.sfm.visit(extractor)

        self.sfm.write(outfile)
        extractor.write_examples(outfile.parent.joinpath('examples.sfm'))

    def load(
            self,
            submission,
            did,
            lid,
            comparison_meanings,
            comparison_meanings_alt_labels,
            marker_map):
        data = Data()
        rel = []

        vocab = models.Dictionary.get(did)
        lang = models.Variety.get(lid)
        for ex in Examples.from_file(self.dir.joinpath('examples.sfm')):
            data.add(
                common.Sentence,
                ex.id,
                id=ex.id,
                name=ex.text,
                language=lang,
                analyzed=ex.morphemes,
                gloss=ex.gloss,
                description=ex.translation)

        for i, entry in enumerate(self.sfm):
            words = list(entry.get_words())
            headword = None

            for j, word in enumerate(words):
                if not word.meanings:
                    print('no meanings for word %s' % word.form)
                    continue

                if not headword:
                    headword = word.id
                else:
                    rel.append((word.id, 'sub', headword))

                for tw in word.rel:
                    rel.append((word.id, tw[0], tw[1]))

                w = data.add(
                    models.Word,
                    word.id,
                    id='%s-%s-%s' % (submission.id, i + 1, j + 1),
                    name=word.form,
                    number=int(word.hm) if word.hm else 0,
                    phonetic=word.ph,
                    pos=word.ps,
                    dictionary=vocab,
                    language=lang)
                DBSession.flush()

                concepts = []

                for k, meaning in enumerate(word.meanings):
                    if not (meaning.ge or meaning.de):
                        print('meaning without description for word %s' % w.name)
                        continue

                    if meaning.ge:
                        meaning.ge = meaning.ge.replace('.', ' ')

                    m = models.Meaning(
                        id='%s-%s' % (w.id, k + 1),
                        name=meaning.de or meaning.ge,
                        description=meaning.de,
                        gloss=meaning.ge,
                        word=w,
                        semantic_domain=', '.join(meaning.sd))

                    assert not meaning.x
                    for xref in meaning.xref:
                        s = data['Sentence'].get(xref)
                        assert s
                        models.MeaningSentence(meaning=m, sentence=s)

                    key = (meaning.ge or meaning.de).replace('.', ' ').lower()
                    concept = None
                    if key in comparison_meanings:
                        concept = comparison_meanings[key]
                    elif key in comparison_meanings_alt_labels:
                        concept = comparison_meanings_alt_labels[key]

                    if concept and concept not in concepts:
                        concepts.append(concept)
                        vsid = '%s-%s' % (key, submission.id),
                        if vsid in data['ValueSet']:
                            vs = data['ValueSet'][vsid]
                        else:
                            vs = data.add(
                                common.ValueSet, vsid,
                                id='%s-%s' % (submission.id, m.id),
                                language=lang,
                                contribution=vocab,
                                parameter_pk=concept)

                        DBSession.add(models.Counterpart(
                            id='%s-%s' % (w.id, k + 1),
                            name=w.name,
                            valueset=vs,
                            word=w))

                for _lang, meanings in word.non_english_meanings.items():
                    assert _lang in submission.md['metalanguages']
                    for meaning in meanings:
                        k += 1
                        models.Meaning(
                            id='%s-%s' % (w.id, k + 1),
                            name=meaning,
                            gloss=meaning,
                            language=submission.md['metalanguages'][_lang],
                            word=w)

                for index, (key, values) in enumerate(word.data.items()):
                    if key in marker_map:
                        label = marker_map[key]
                        converter = default_value_converter
                        if isinstance(label, (list, tuple)):
                            label, converter = label
                        for value in values:
                            DBSession.add(common.Unit_data(
                                object_pk=w.pk,
                                key=label,
                                value=converter(value, word.data),
                                ord=index))

        # FIXME: vgroup words by description and add synonym relationships!

        for s, d, t in rel:
            if s in data['Word'] and t in data['Word']:
                DBSession.add(models.SeeAlso(
                    source_pk=data['Word'][s].pk,
                    target_pk=data['Word'][t].pk,
                    description=d))
            else:
                print('---m---', s if s not in data['Word'] else t)
Пример #33
0
def htmlmap(args, min_langs_for_legend_item=10):
    """
    glottolog --repos=. htmlmap [OUTDIR] [GLOTTOCODES]
    """
    nodes = {n.id: n for n in args.repos.languoids()}
    legend = Counter()

    glottocodes = None
    if len(args.args) > 1:
        glottocodes = read_text(args.args[1]).split()

    langs = []
    for n in nodes.values():
        if ((glottocodes is None
             and n.level == args.repos.languoid_levels.language) or
            (glottocodes and n.id in glottocodes)) and n.latitude != None:
            fid = n.lineage[0][1] if n.lineage else n.id
            if (not nodes[fid].category.startswith('Pseudo')) or fid == n.id:
                langs.append((n, fid))
                legend.update([fid])

    color_map = [fid for fid, _ in legend.most_common()]
    color_map = dict(zip(color_map, qualitative_colors(len(color_map))))
    print(color_map)

    def l2f(t):
        n, fid = t
        lon, lat = n.longitude, n.latitude
        if lon <= -26:
            lon += 360  # make the map pacific-centered.

        return {
            "geometry": {
                "coordinates": [lon, lat],
                "type": "Point"
            },
            "id": n.id,
            "properties": {
                "name": n.name,
                "color": color_map[fid],
                "family": nodes[fid].name,
                "family_id": fid,
            },
            "type": "Feature"
        }

    def legend_item(fid, c):
        return \
            '<span style="background-color: {0}; border: 1px solid black;">'\
            '&nbsp;&nbsp;&nbsp;</span> '\
            '<a href="https://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format(
                color_map[fid], fid, nodes[fid].name, c)

    geojson = {
        "features": list(map(l2f, langs)),
        "properties": {
            "legend": {
                fid: legend_item(fid, c)
                for fid, c in legend.most_common()
                if c >= min_langs_for_legend_item
            },
        },
        "type": "FeatureCollection"
    }

    def rendered_template(name, **kw):
        return Template(
            read_text(
                Path(pyglottolog.__file__).parent.joinpath(
                    'templates', 'htmlmap', name))).substitute(**kw)

    jsname = 'glottolog_map.json'
    outdir = Path('.') if not args.args else Path(args.args[0])
    write_text(
        outdir.joinpath(jsname),
        rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4)))
    html = outdir.joinpath('glottolog_map.html')
    write_text(
        html,
        rendered_template('htmlmap.html',
                          version=git_describe(args.repos.repos),
                          jsname=jsname,
                          nlangs=len(langs)))
    print(html.resolve().as_uri())
Пример #34
0
 def write_info(self, outdir):
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     self.cfg.write(outdir.joinpath(self.fname('.ini')))
Пример #35
0
def htmlmap(args):
    """
    glottolog htmlmap [OUTDIR]
    """
    nodes = {n.id: n for n in args.repos.languoids()}
    legend = Counter()

    langs = []
    for n in nodes.values():
        if n.level == Level.language and n.latitude != None:
            fid = n.lineage[0][1] if n.lineage else n.id
            if not nodes[fid].category.startswith('Pseudo'):
                langs.append((n, fid))
                legend.update([fid])

    color_map = {
        fid: "{0:0{1}X}".format((i + 1) * 10, 3)
        for i, fid in enumerate(sorted(legend.keys()))
    }

    def l2f(t):
        n, fid = t
        lon, lat = n.longitude, n.latitude
        if lon <= -26:
            lon += 360

        return {
            "geometry": {
                "coordinates": [lon, lat],
                "type": "Point"
            },
            "id": n.id,
            "properties": {
                "name": n.name,
                "color": color_map[fid],
                "family": nodes[fid].name,
                "family_id": fid,
            },
            "type": "Feature"
        }

    def legend_item(fid, c):
        return \
            '<span style="background-color: #{0}; border: 1px solid black;">'\
            '&nbsp;&nbsp;&nbsp;</span> '\
            '<a href="http://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format(
                color_map[fid], fid, nodes[fid].name, c)

    geojson = {
        "features": map(l2f, langs),
        "properties": {
            "legend": {
                fid: legend_item(fid, c)
                for fid, c in legend.most_common() if c > 10
            },
        },
        "type": "FeatureCollection"
    }

    def rendered_template(name, **kw):
        return Template(
            read_text(
                Path(pyglottolog.__file__).parent.joinpath(
                    'templates', 'htmlmap', name))).substitute(**kw)

    jsname = 'glottolog_map.json'
    outdir = Path('.') if not args.args else Path(args.args[0])
    write_text(
        outdir.joinpath(jsname),
        rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4)))
    html = outdir.joinpath('glottolog_map.html')
    write_text(
        html,
        rendered_template('htmlmap.html',
                          version=git_describe(args.repos.repos),
                          jsname=jsname,
                          nlangs=len(langs)))
    print(html.resolve().as_uri())