def test_read_text(): # TODO test bad chars path = os.path.join(TESTING_BASE_DIR, 'read_text.json') with open(path, 'w') as f: f.write(TEXT) # regular data = fileio.read_text(path) assert data == TEXT # utf8_strict data = fileio.read_text(path, utf8_strict=True) assert data == TEXT # clean up os.remove(path)
def from_json(model, json_path, identifier, inherit=True): """Read the specified JSON file and properly instantiate object. @param model: LocalCollection, LocalEntity, or File @param json_path: absolute path to the object's .json file @param identifier: [optional] Identifier @param inherit: boolean Disable in loops to avoid infinite recursion @returns: object """ document = None if not model: raise Exception('Cannot instantiate from JSON without a model object.') if not json_path: raise Exception('Bad path: %s' % json_path) if identifier.model in ['file']: # object_id is in .json file path = os.path.splitext(json_path)[0] document = model(path, identifier=identifier) else: # object_id is in object directory document = model(os.path.dirname(json_path), identifier=identifier) document_id = document.id # save this just in case document.load_json(fileio.read_text(json_path)) if not document.id: # id gets overwritten if document.json is blank document.id = document_id return document
def checksums(self, algo, force_read=False): """Calculates hash checksums for the Entity's files. Gets hashes from FILE.json metadata if the file(s) are absent from the filesystem (i.e. git-annex file symlinks). Overrides DDR.models.Entity.checksums. @param algo: str @param force_read: bool Traverse filesystem if true. @returns: list of (checksum, filepath) tuples """ checksums = [] if algo not in self.checksum_algorithms(): raise Error('BAD ALGORITHM CHOICE: {}'.format(algo)) for f in self._file_paths(): cs = None ext = None pathname = os.path.splitext(f)[0] # from metadata file json_path = os.path.join(self.files_path, f) for field in json.loads(fileio.read_text(json_path)): for k,v in field.iteritems(): if k == algo: cs = v if k == 'basename_orig': ext = os.path.splitext(v)[-1] fpath = pathname + ext if force_read: # from filesystem # git-annex files are present if os.path.exists(fpath): cs = util.file_hash(fpath, algo) if cs: checksums.append( (cs, os.path.basename(fpath)) ) return checksums
def read_id_file(path): """Read file and return list of IDs @param path: str Absolute path to file. @returns: list of IDs """ text = fileio.read_text(path) ids = [line.strip() for line in text.strip().split('\n')] return ids
def dump_xml(self): """Dump Entity data to mets.xml file. TODO This should not actually write the XML! It should return XML to the code that calls it. """ return Template( fileio.read_text(config.TEMPLATE_METS_JINJA2) ).render(object=self)
def merge_add( repo, file_path_rel ): """Adds file unless contains conflict markers """ # check for merge conflict markers file_path_abs = os.path.join(repo.working_dir, file_path_rel) txt = fileio.read_text(file_path_abs) if (MERGE_MARKER_START in txt) or (MERGE_MARKER_MID in txt) or (MERGE_MARKER_END in txt): return 'ERROR: file still contains merge conflict markers' repo.git.add(file_path_rel) return 'ok'
def load_json_lite(json_path, model, object_id): """Simply reads JSON file and adds object_id if it's a file @param json_path: str @param model: str @param object_id: str @returns: list of dicts """ document = json.loads(fileio.read_text(json_path)) if model == 'file': document.append( {'id':object_id} ) return document
def postjson(hosts, index, doctype, object_id, path): """Post raw JSON file to Elasticsearch (YMMV) This command is for posting raw JSON files. If the file you wish to post is a DDR object, please use "ddrindex post". """ status = docstore.Docstore(hosts, index).post_json( doctype, object_id, fileio.read_text(path) ) click.echo(status)
def _read_fields(self, path): """Extracts specified fields from JSON """ data = {} for d in json.loads(fileio.read_text(path)): key = d.keys()[0] if key in JSON_FIELDS.keys(): # coerces to int if d.get(key) and isinstance(JSON_FIELDS[key], int): data[key] = int(d[key]) else: data[key] = d[key] return data
def children(self, quick=False): """Returns list of the Collection's Entity objects. >>> c = Collection.from_json('/tmp/ddr-testing-123') >>> c.children() [<Entity ddr-testing-123-1>, <Entity ddr-testing-123-2>, ...] TODO use util.find_meta_files() @param quick: Boolean List only titles and IDs @param dicts: Boolean List only titles and IDs (dicts) @returns: list of Entities or ListEntity """ entity_paths = [] if os.path.exists(self.files_path): # TODO use cached list if available for eid in os.listdir(self.files_path): path = os.path.join(self.files_path, eid) entity_paths.append(path) entity_paths = util.natural_sort(entity_paths) entities = [] for path in entity_paths: if quick: # fake Entity with just enough info for lists entity_json_path = os.path.join(path, 'entity.json') if os.path.exists(entity_json_path): e = ListEntity() e.identifier = Identifier(path=path) e.id = e.identifier.id for line in fileio.read_text(entity_json_path).split('\n'): if '"title":' in line: e.title = json.loads('{%s}' % line)['title'] elif '"signature_id":' in line: e.signature_id = json.loads('{%s}' % line)['signature_id'] e.signature_abs = common.signature_abs( e, self.identifier.basepath) if e.title and e.signature_id: # stop once we have what we need so we don't waste time # and have entity.children as separate ghost entities break entities.append(e) else: entity = Entity.from_identifier(Identifier(path=path)) for lv in entity.labels_values(): if lv['label'] == 'title': entity.title = lv['value'] entities.append(entity) return entities
def analyze_files(paths, verbose=False): """Opens files with strict encoding; lists paths that throw exceptions @param paths: list @param verbose: boolean @returns: list of defective paths """ defects = [] for path in paths: bad = 0 try: text = fileio.read_text(path, utf8_strict=True) except: bad += 1 defects.append(path) text = fileio.read_text(path) guess = chardet.detect(text) if verbose: print('\n| {} {}'.format(path, guess)) if (not bad) and verbose: sys.stdout.write('.') if len(paths) and verbose: print('') return defects
def _load_vocab_files(vocabs_path): """Loads vocabulary term files in the 'ddr' repository @param vocabs_path: Absolute path to dir containing vocab .json files. @returns: list of raw text contents of files. """ json_paths = [] for p in os.listdir(vocabs_path): path = os.path.join(vocabs_path, p) if os.path.splitext(path)[1] == '.json': json_paths.append(path) json_texts = [ fileio.read_text(path) for path in json_paths ] return json_texts
def model_def_fields(document): """ Wrapper around DDR.models.model_def_fields """ module = document.identifier.fields_module() json_text = fileio.read_text(document.json_path) result = modules.Module(module).cmp_model_definition_fields(json_text) added = result['added'] removed = result['removed'] # 'File.path_rel' is created when instantiating Files, # is not part of model definitions. def rm_path_rel(fields): if 'path_rel' in fields: fields.remove('path_rel') rm_path_rel(added) rm_path_rel(removed) if added: document.model_def_fields_added = added document.model_def_fields_added_msg = WEBUI_MESSAGES['MODEL_DEF_FIELDS_ADDED'] % added if removed: document.model_def_fields_removed = removed document.model_def_fields_removed_msg = WEBUI_MESSAGES['MODEL_DEF_FIELDS_REMOVED'] % removed
def links_incoming(self): """List of path_rels of files that link to this file. """ incoming = [] cmd = 'find {} -name "*.json" -print'.format(self.entity_files_path) r = envoy.run(cmd) jsons = [] if r.std_out: jsons = r.std_out.strip().split('\n') for filename in jsons: data = json.loads(fileio.read_text(filename)) path_rel = None for field in data: if field.get('path_rel', None): path_rel = field['path_rel'] for field in data: linksraw = field.get('links', None) if linksraw: for link in linksraw.strip().split(';'): link = link.strip() if self.basename in link: incoming.append(path_rel) return incoming
def links_incoming( self ): """List of path_rels of files that link to this file. """ incoming = [] cmd = 'find {} -name "*.json" -print'.format(self.entity_files_path) r = envoy.run(cmd) jsons = [] if r.std_out: jsons = r.std_out.strip().split('\n') for filename in jsons: data = json.loads(fileio.read_text(filename)) path_rel = None for field in data: if field.get('path_rel',None): path_rel = field['path_rel'] for field in data: linksraw = field.get('links', None) if linksraw: for link in linksraw.strip().split(';'): link = link.strip() if self.basename in link: incoming.append(path_rel) return incoming
def sort_file_paths(json_paths, rank='role-eid-sort'): """Sort file JSON paths in human-friendly order. TODO this belongs in DDR.identifier @param json_paths: @param rank: 'role-eid-sort' or 'eid-sort-role' """ paths = {} keys = [] while json_paths: path = json_paths.pop() identifier = Identifier(path=path) eid = identifier.parts.get('eid',None) role = identifier.parts.get('role',None) sha1 = identifier.parts.get('sha1',None) sort = 0 for line in fileio.read_text(path).splitlines(): if 'sort' in line: sort = line.split(':')[1].replace('"','').strip() eid = str(eid) sha1 = str(sha1) sort = str(sort) if rank == 'eid-sort-role': key = '-'.join([str(eid),sort,role,sha1]) elif rank == 'role-eid-sort': key = '-'.join([role,eid,sort,sha1]) paths[key] = path keys.append(key) keys_sorted = [key for key in util.natural_sort(keys)] paths_sorted = [] while keys_sorted: val = paths.pop(keys_sorted.pop(), None) if val: paths_sorted.append(val) return paths_sorted
def log(self): log = '' if os.path.exists(self.logpath): log = fileio.read_text(self.logpath) return log
def load_template(filename): return fileio.read_text(filename)
def read_changelog(path: str) -> List[Dict[str, object]]: """ @param path: Absolute path to changelog file. @returns list of entry dicts """ return read_entries(fileio.read_text(path))
def gitignore(self): if not os.path.exists(self.gitignore_path): fileio.write_text(fileio.read_text(GITIGNORE_TEMPLATE), self.gitignore_path) return fileio.read_text(self.gitignore_path)
def load_json(path): try: data = json.loads(fileio.read_text(path)) except json.JSONDecodeError: raise Exception('json.errors.JSONDecodeError reading %s' % path) return data
def load_template(filename: str) -> str: return fileio.read_text(filename)
def load_json(path): try: data = json.loads(fileio.read_text(path)) except json.errors.JSONDecodeError: raise Exception('simplejson.errors.JSONDecodeError reading %s' % path) return data