def datasets(keywords=None, licenses=None): """Search all datasets by keywords and licenses.""" script_list = SCRIPT_LIST() if not keywords and not licenses: # The scripts present locally offline_scripts = sorted(script_list, key=lambda s: s.name.lower()) # The scripts present in upstream retriever repository retriever_scripts = get_dataset_names_upstream( repo=RETRIEVER_REPOSITORY) # The scripts present in upstream recipes repository recipes_scripts = get_dataset_names_upstream() # Sorted list of all the online scripts native_scripts = sorted(list(set(retriever_scripts + recipes_scripts))) return {'online': native_scripts, 'offline': offline_scripts} offline_scripts = set() if licenses: licenses = [i.lower() for i in licenses] for script in script_list: if script.name: if licenses: script_license = [ licence_map['name'].lower() for licence_map in script.licenses if licence_map['name'] ] if script_license and set(script_license).intersection( set(licenses)): offline_scripts.add(script) continue if keywords: script_keywords = script.title + ' ' + script.name if script.keywords: script_keywords = script_keywords + ' ' + '-'.join( script.keywords) script_keywords = script_keywords.lower() for k in keywords: if script_keywords.find(k.lower()) != -1: offline_scripts.add(script) break # The offline scripts filtered by params offline_scripts = sorted(list(offline_scripts), key=lambda s: s.name.lower()) # The scripts present in upstream retriever repository filtered by params retriever_scripts = get_dataset_names_upstream(keywords, licenses, repo=RETRIEVER_REPOSITORY) # The scripts present in upstream recipes repository filtered by params recipes_scripts = get_dataset_names_upstream(keywords, licenses) native_scripts = sorted(list(set(retriever_scripts + recipes_scripts))) datasets_dict = {'online': native_scripts, 'offline': offline_scripts} return datasets_dict
for module in module_list: script_list.append(module.name) if hasattr(module, "keywords"): # Add list of keywords to keywords_list if module.keywords: keywords_list += module.keywords if hasattr(module, "licenses"): # Append string to list of licenses_list if module.licenses: for dict_items in module.licenses: if dict_items['name']: licenses_list.append(dict_items['name']) script_list.extend(get_dataset_names_upstream(repo=RETRIEVER_REPOSITORY)) script_list.extend(get_dataset_names_upstream()) script_list = sorted(set(script_list)) # set of all possible licenses and keywords licenses_options = set(licenses_list) keywords_options = set(keywords_list) parser = argparse.ArgumentParser(prog="retriever") parser.add_argument('-v', '--version', action='version', version=VERSION) parser.add_argument('-q', '--quiet', help='suppress command-line output', action='store_true') # ..............................................................