def link(args): """\ Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or CONCEPTICON_ID is given, the other is added. concepticon link <concept-list> """ conceptlist = Path(args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): conceptlist = data_path('conceptlists', args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): raise ParserError('no file %s found' % args.args[0]) rewrite(conceptlist, Linker(conceptlist.stem))
def link(args): """ Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or CONCEPTICON_ID is given, the other is added. concepticon link <concept-list> """ api = Concepticon(args.data) conceptlist = Path(args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): conceptlist = api.data_path('conceptlists', args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): raise ParserError('no file %s found' % args.args[0]) rewrite(conceptlist, Linker(conceptlist.stem, api.conceptsets.values()))
def create(self, path, metadata, filter_=filter_hidden, object_class=None): """ Create objects in CDSTAR and register them in the catalog. Note that we guess the mimetype based on the filename extension, using `mimetypes.guess_type`. Thus, it is the caller's responsibility to add custom or otherwise uncommon types to the list of known types using `mimetypes.add_type`. :param path: :param metadata: :param filter_: :return: """ path = Path(path) if path.is_file(): fnames = [path] elif path.is_dir(): fnames = list(walk(path, mode='files')) else: raise ValueError( 'path must be a file or directory') # pragma: no cover for fname in fnames: if not filter_ or filter_(fname): created, obj = self._create(fname, metadata, object_class=object_class) yield fname, created, obj
def stats(args): """ cldf stats <DATASET> Print basic stats for CLDF dataset <DATASET>, where <DATASET> may be the path to - a CLDF metadata file - a CLDF core data file - a CLDF zip archive """ if len(args.args) < 1: raise ParserError('not enough arguments') fname = Path(args.args[0]) if not fname.exists() or not fname.is_file(): raise ParserError('%s is not an existing directory' % fname) if fname.suffix == '.zip': ds = Dataset.from_zip(fname) elif fname.name.endswith(MD_SUFFIX): ds = Dataset.from_metadata(fname) else: ds = Dataset.from_file(fname) print(fname) stats_ = ds.stats print(""" Name: %s Different languages: %s Different parameters: %s Rows: %s """ % ( ds.name, len(stats_['languages']), len(stats_['parameters']), stats_['rowcount'] ))
def _get_dataset(args): if len(args.args) < 1: raise ParserError('not enough arguments') fname = Path(args.args[0]) if not fname.exists() or not fname.is_file(): raise ParserError('%s is not an existing directory' % fname) if fname.suffix == '.json': return Dataset.from_metadata(fname) return Dataset.from_data(fname)
def link(args): """ Link concepts to concept sets for a given concept list. Notes ----- If either CONCEPTICON_GLOSS or CONCEPTICON_ID is given, the other is added. Examples -------- $ concepticon link path_to_conceptlist.tsv """ api = Concepticon(args.repos) conceptlist = Path(args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): conceptlist = api.data_path('conceptlists', args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): raise ParserError('no file %s found' % args.args[0]) rewrite(conceptlist, Linker(conceptlist.stem, api.conceptsets.values()))
def load_normalized(_path): """Normalization for quasi-identical strings which are often confused.""" path = Path(_path) if not path.is_file(): path = local_path(_path) norms = {} with path.open(encoding='utf-8') as handle: for line in handle: if not line.startswith('#') and line.strip(): source, target = line.strip().split('\t') norms[eval('"' + source + '"')] = eval('r"' + target + '"') return norms
def load_alias(_path): """ Alias are one-character sequences which we can convert on a step-by step basis by applying them successively to all subsegments of a segment. """ path = Path(_path) if not path.is_file(): path = local_path(_path) alias = {} with path.open(encoding='utf-8') as handle: for line in handle: if not line.startswith('#') and line.strip(): source, target = line.strip().split('\t') alias[eval('"' + source + '"')] = eval('r"' + target + '"') return alias
def rewrite(fname, visitor, **kw): """Utility function to rewrite rows in tsv files. :param fname: Path of the dsv file to operate on. :param visitor: A callable that takes a line-number and a row as input and returns a \ (modified) row or None to filter out the row. :param kw: Keyword parameters are passed through to csv.reader/csv.writer. """ if not isinstance(fname, Path): assert isinstance(fname, string_types) fname = Path(fname) assert fname.is_file() tmp = fname.parent.joinpath('.tmp.' + fname.name) with UnicodeReader(fname, **kw) as reader_: with UnicodeWriter(tmp, **kw) as writer: for i, row in enumerate(reader_): row = visitor(i, row) if row is not None: writer.writerow(row) shutil.move(tmp.as_posix(), fname.as_posix())
def _existing_file(fname): fname = Path(fname) assert fname.exists() and fname.is_file() return fname