def fetch(params, seqid=None, db=None, update=False): """ Obtains data from NCBI """ for p in params: # Skip if exists (or not update). if p.json and not update: continue # The JSON representation of the data. json_name = resolve_fname(name=p.acc, format="json") # GenBank representation of the data. gbk_name = resolve_fname(name=p.acc, format="gb") # Fetch and store genbank from remote site. ncbi_efetch(p.acc, db=db, gbk_name=gbk_name) # Convert genbank to JSON. data = jsonrec.parse_file(fname=gbk_name, seqid=seqid) # Save JSON file. save_json_file(fname=json_name, data=data)
def fetch_data(data, param): """ Obtains data from NCBI. Fills each parameter with a json field. """ db = "protein" if param.protein else "nuccore" for name in data: # Pretend no data if it is an update. json = None if param.update else get_json(name) # The data exists, nothing needs to be done. if json: continue # The JSON representation of the data. json_name = resolve_fname(name=name, format="json") # GenBank representation of the data. gbk_name = resolve_fname(name=name, format="gb") # Genome assembly data. if name.startswith("GCA") or name.startswith("GCF"): ncbi.genome(name=name, fname=gbk_name, update=param.update) else: # Genbank data. ncbi.genbank_save(name, db=db, fname=gbk_name) # Convert Genbank to JSON. data = jsonrec.parse_file(fname=gbk_name, seqid=param.seqid) # Save JSON file. save_json_file(fname=json_name, data=data)
def get_json(name, seqid=None, update=False, inter=False, strict=False): """ Attempts to return a JSON formatted data based on a name. """ # Data is an existing path to a file. if os.path.isfile(name): data = jsonrec.parse_file(name, seqid=seqid) return data # Not a local file, attempt to resolve to storage. # Report as not found if update is requested. if update: return None # The JSON representation of the data. json_name = resolve_fname(name=name, format="json") # GenBank representation of the data. gbk_name = resolve_fname(name=name, format="gb") # Found the JSON representation of the file. if os.path.isfile(json_name): logger.info(f"found {json_name}") data = read_json_file(json_name) return data # No JSON file but there is a genbank file. if os.path.isfile(gbk_name): logger.info(f"found {gbk_name}") data = jsonrec.parse_file(fname=gbk_name, seqid=seqid) data = save_json_file(fname=json_name, data=data) return data # If not found and interactive mode create a JSON from the name itself. if inter: data = jsonrec.make_jsonrec(seq=name, seqid=seqid) return data # At this point the data was not found if strict: utils.error(f"data not found: {name}") return None
def get_json(name, seqid=None, inter=False, strict=False): """ Attempts to return a JSON formatted data based on a name. """ # Data is an existing path to a JSON file. if os.path.isfile(name): try: data = jsonrec.parse_file(name, seqid=seqid) except Exception as exc: logger.error(f"JSON parsing error for file {name}: {exc}") sys.exit(-1) return data # The JSON representation of the data. json_name = resolve_fname(name=name, format="json") # GenBank representation of the data. gbk_name = resolve_fname(name=name, format="gb") # Found the JSON representation of the file. if os.path.isfile(json_name): logger.info(f"found {json_name}") data = read_json_file(json_name) return data # There is no JSON file but there is a GenBank file. if os.path.isfile(gbk_name): logger.info(f"found {gbk_name}") data = jsonrec.parse_file(fname=gbk_name, seqid=seqid) data = save_json_file(fname=json_name, data=data) return data # Interactive input, make JSON from name if inter: data = jsonrec.make_jsonrec(name, seqid=seqid) return data # Raise error if in strict mode if strict: utils.error(f"data not found: {name}") return None