def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Although not really necessary to create S3 folders (because S3 has # flat namespace), do so for consistency with the regular file system # S3 folders are marked by having trailing '/' so add it now # s3_dir = '%s/' % rel_path # self._push_to_os(s3_dir, from_string='') # If instructed, create the dataset in cache & in S3 if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='')
def create(self, obj, **kwargs): """Touch a file (aka create empty), if it doesn't exist""" if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) if dir_only: self.pithos.upload_from_string( rel_path, '', content_type='application/directory') else: rel_path = os.path.join( rel_path, alt_name if alt_name else 'dataset_{0}.dat'.format(obj.id)) new_file = os.path.join(self.staging_path, rel_path) open(new_file, 'w').close() self.pithos.upload_from_string(rel_path, '')
def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but S3 will not # follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now rel_path = '%s/' % rel_path if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path
def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='')
def job_properties(self, job_id, job_directory): pid = open(self.__pid_file(job_directory), "r").read().strip() contents = os.listdir(job_directory) try: rel_path = filter(self._is_instrumented_collectl_log, contents)[0] path = os.path.join(job_directory, rel_path) except IndexError: message = "Failed to find collectl log in directory %s, files were %s" % (job_directory, contents) raise Exception(message) properties = dict( pid=int(pid), ) if self.saved_logs_path: destination_rel_dir = os.path.join(*util.directory_hash_id(job_id)) destination_rel_path = os.path.join(destination_rel_dir, rel_path) destination_path = os.path.join(self.saved_logs_path, destination_rel_path) destination_dir = os.path.dirname(destination_path) if not os.path.isdir(destination_dir): os.makedirs(destination_dir) shutil.copyfile(path, destination_path) properties["raw_log_path"] = destination_rel_path if self.summarize_process_data: # Run collectl in playback and generate statistics of interest summary_statistics = self.__summarize_process_data(pid, path) for statistic, value in summary_statistics: properties["process_%s" % "_".join(statistic)] = value return properties
def get_repos( sa_session, path_to_repositories ): """ Load repos from DB and included tools from .xml configs. """ results = [] for repo in sa_session.query( model.Repository ).filter_by( deleted=False ).filter_by( deprecated=False ).filter( model.Repository.type != 'tool_dependency_definition' ): repo_id = repo.id name = repo.name description = repo.description long_description = repo.long_description homepage_url = repo.homepage_url remote_repository_url = repo.remote_repository_url times_downloaded = repo.times_downloaded if not isinstance( times_downloaded, ( int, long ) ): times_downloaded = 0 repo_owner_username = '' if repo.user_id is not None: user = sa_session.query( model.User ).filter( model.User.id == repo.user_id ).one() repo_owner_username = user.username approved = 'no' for review in repo.reviews: if review.approved == 'yes': approved = 'yes' break # Format the time since last update to be nicely readable. last_updated = pretty_print_time_interval( repo.update_time ) full_last_updated = repo.update_time.strftime( "%Y-%m-%d %I:%M %p" ) # Parse all the tools within repo for separate index. tools_list = [] path = os.path.join( path_to_repositories, *directory_hash_id( repo.id ) ) path = os.path.join( path, "repo_%d" % repo.id ) if os.path.exists(path): tools_list.extend( load_one_dir( path ) ) for root, dirs, files in os.walk( path ): if '.hg' in dirs: dirs.remove('.hg') for dirname in dirs: tools_in_dir = load_one_dir( os.path.join( root, dirname ) ) tools_list.extend( tools_in_dir ) results.append(dict( id=repo_id, name=name, description=description, long_description=long_description, homepage_url=homepage_url, remote_repository_url=remote_repository_url, repo_owner_username=repo_owner_username, times_downloaded=times_downloaded, approved=approved, last_updated=last_updated, full_last_updated=full_last_updated, tools_list=tools_list ) ) return results
def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): # Pull out locally used fields extra_dir = kwargs.get('extra_dir', None) extra_dir_at_root = kwargs.get('extra_dir_at_root', False) dir_only = kwargs.get('dir_only', False) alt_name = kwargs.get('alt_name', None) # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # Create given directory in cache cache_dir = os.path.join(self.staging_path, rel_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Although not really necessary to create S3 folders (because S3 has # flat namespace), do so for consistency with the regular file system # S3 folders are marked by having trailing '/' so add it now # s3_dir = '%s/' % rel_path # self._push_to_os(s3_dir, from_string='') # If instructed, create the dataset in cache & in S3 if not dir_only: rel_path = os.path.join( rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) open(os.path.join(self.staging_path, rel_path), 'w').close() self._push_to_os(rel_path, from_string='')
def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): """ Construct the absolute path for accessing the object identified by `obj.id`. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base directory in which this object should be created, or None to specify the default directory. :type dir_only: boolean :param dir_only: If True, check only the path where the file identified by `obj` should be located, not the dataset itself. This option applies to `extra_dir` argument as well. :type extra_dir: string :param extra_dir: Append the value of this parameter to the expected path used to access the object identified by `obj` (e.g., /files/000/<extra_dir>/dataset_10.dat). :type alt_name: string :param alt_name: Use this name as the alternative name for the returned dataset rather than the default. :type old_style: boolean param old_style: This option is used for backward compatibility. If `True` then the composed directory structure does not include a hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path)) # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name and not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") if old_style: if extra_dir is not None: path = os.path.join(base, extra_dir) else: path = base else: # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Create a subdirectory for the object ID if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) path = os.path.join(base, rel_path) if not dir_only: path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return os.path.abspath(path)
def rel_path_for_uuid_test(self): rel_path = os.path.join(*directory_hash_id(self.uuid)) return rel_path
def get_repos(sa_session, path_to_repositories, hgweb_config_dir): """ Load repos from DB and included tools from .xml configs. """ hgwcm = HgWebConfigManager() hgwcm.hgweb_config_dir = hgweb_config_dir results = [] for repo in sa_session.query(model.Repository).filter_by( deleted=False).filter_by(deprecated=False).filter( model.Repository.type != 'tool_dependency_definition'): category_names = [] for rca in sa_session.query( model.RepositoryCategoryAssociation ).filter(model.RepositoryCategoryAssociation.repository_id == repo.id): for category in sa_session.query(model.Category).filter( model.Category.id == rca.category.id): category_names.append(category.name.lower()) categories = (",").join(category_names) repo_id = repo.id name = repo.name description = repo.description long_description = repo.long_description homepage_url = repo.homepage_url remote_repository_url = repo.remote_repository_url times_downloaded = repo.times_downloaded if not isinstance(times_downloaded, (int, long)): times_downloaded = 0 repo_owner_username = '' if repo.user_id is not None: user = sa_session.query( model.User).filter(model.User.id == repo.user_id).one() repo_owner_username = user.username.lower() approved = 'no' for review in repo.reviews: if review.approved == 'yes': approved = 'yes' break # Format the time since last update to be nicely readable. last_updated = pretty_print_time_interval(repo.update_time) full_last_updated = repo.update_time.strftime("%Y-%m-%d %I:%M %p") # load all changesets of the repo repo_path = hgwcm.get_entry( os.path.join("repos", repo.user.username, repo.name)) hg_repo = hg.repository(ui.ui(), repo_path) lineage = [] for changeset in hg_repo.changelog: lineage.append( str(changeset) + ":" + str(hg_repo.changectx(changeset))) repo_lineage = str(lineage) # Parse all the tools within repo for separate index. tools_list = [] path = os.path.join(path_to_repositories, *directory_hash_id(repo.id)) path = os.path.join(path, "repo_%d" % repo.id) if os.path.exists(path): tools_list.extend(load_one_dir(path)) for root, dirs, files in os.walk(path): if '.hg' in dirs: dirs.remove('.hg') for dirname in dirs: tools_in_dir = load_one_dir(os.path.join(root, dirname)) tools_list.extend(tools_in_dir) results.append( dict(id=repo_id, name=name, description=description, long_description=long_description, homepage_url=homepage_url, remote_repository_url=remote_repository_url, repo_owner_username=repo_owner_username, times_downloaded=times_downloaded, approved=approved, last_updated=last_updated, full_last_updated=full_last_updated, tools_list=tools_list, repo_lineage=repo_lineage, categories=categories)) return results
def copy_files_to_irods(start_dataset_id, end_dataset_id, object_store_info_file, irods_info_file, db_connection_info_file, copy_or_checksum): conn = None session = None osi_keys = None read_sql_statement = None read_cursor = None args = None rows = None objectid = None object_store_id = None uuid_without_dash = None uuid_with_dash = None object_store_path = None disk_sub_folder = None irods_sub_folder = None disk_file_path = None disk_folder_path = None irods_file_path = None irods_file_collection_path = None irods_folder_collection_path = None options = None iput_command = None object_store_info = None irods_info = None db_connection_info = None if start_dataset_id > end_dataset_id: print( f"Error: start_dataset_id {start_dataset_id} cannot be larger than end_dataset_id {end_dataset_id}" ) return # read object_store_info file with open(object_store_info_file, mode="r") as osi: object_store_info = json.load(osi) osi_keys = tuple(object_store_info.keys()) # read irods_info_file with open(irods_info_file, mode="r") as ii: irods_info = json.load(ii) # read db_connectin_info file with open(db_connection_info_file, mode="r") as dci: db_connection_info = json.load(dci) try: # declare a new PostgreSQL connection object conn = connect(dbname=db_connection_info["dbname"], user=db_connection_info["user"], host=db_connection_info["host"], password=db_connection_info["password"]) conn.cursor() except Exception as e: print(e) return session = iRODSSession(host=irods_info["host"], port=irods_info["port"], user=irods_info["user"], password=irods_info["password"], zone=irods_info["zone"]) session.connection_timeout = int(irods_info["timeout"]) osi_keys = tuple(object_store_info.keys()) read_sql_statement = """SELECT id, object_store_id, uuid FROM dataset WHERE state = %s AND NOT deleted AND NOT purged AND id >= %s AND id <= %s AND object_store_id IN %s""" update_sql_statement = """UPDATE dataset SET object_store_id = %s WHERE id = %s""" try: read_cursor = conn.cursor() args = ('ok', start_dataset_id, end_dataset_id, osi_keys) read_cursor.execute(read_sql_statement, args) rows = read_cursor.fetchall() for row in rows: objectid = row[0] object_store_id = row[1] uuid_without_dash = row[2] uuid_with_dash = str(uuid.UUID(uuid_without_dash)) object_store_path = object_store_info.get(object_store_id) if object_store_path is None: print( f"Error: object_store_info_file does not have a value for {object_store_id}" ) raise Exception irods_resc = get_irods_resource(conn, objectid, object_store_id, irods_info) disk_sub_folder = os.path.join(*directory_hash_id(objectid)) irods_sub_folder = os.path.join(*directory_hash_id(uuid_with_dash)) disk_file_path = os.path.join(object_store_path, disk_sub_folder, "dataset_" + str(objectid) + ".dat") disk_folder_path = os.path.join( object_store_path, disk_sub_folder, "dataset_" + str(objectid) + "_files") irods_file_path = os.path.join( irods_info["home"], irods_sub_folder, "dataset_" + str(uuid_with_dash) + ".dat") irods_file_collection_path = os.path.join(irods_info["home"], irods_sub_folder) irods_folder_collection_path = os.path.join( irods_file_collection_path, "dataset_" + str(uuid_with_dash) + "_files") if copy_or_checksum == "copy": # Create the collection session.collections.create(irods_file_collection_path) # Add disk file to collection options = {kw.REG_CHKSUM_KW: '', kw.RESC_NAME_KW: irods_resc} session.data_objects.put(disk_file_path, irods_file_path, **options) print( f"Copied disk file {disk_file_path} to irods {irods_file_path}" ) if os.path.isdir(disk_folder_path): disk_folder_path_all_files = disk_folder_path + "/*" # Create the collection session.collections.create(irods_folder_collection_path) iput_command = "iput -R " + irods_resc + " -rk " + disk_folder_path_all_files + " " + irods_folder_collection_path subprocess.call(iput_command, shell=True) print( f"Copied disk folder {disk_folder_path} to irods {irods_folder_collection_path}" ) if copy_or_checksum == "checksum": # Calculate disk file checksum. Then get the file checksum from irods and compare it with the calculated disk file checksum # Note that disk file checksum is ASCII, whereas irods file checksum is Unicode. get_file_checksum() converts checksum to unicode disk_file_checksum = get_file_checksum(disk_file_path) # Now get the file from irods try: obj = session.data_objects.get(irods_file_path) # obj.checksum is prepended with 'sha2:'. Remove that so we can compare it to disk file checksum irods_file_checksum = obj.checksum[5:] if irods_file_checksum != disk_file_checksum: print( f"Error: irods file checksum {irods_file_checksum} does not match disk file checksum {disk_file_checksum} for irods file {irods_file_path} and disk file {disk_file_path}" ) continue except (DataObjectDoesNotExist, CollectionDoesNotExist) as e: print(e) continue except NetworkException as e: print(e) continue # Recursively verify that the checksum of all files in this folder matches that in irods if os.path.isdir(disk_folder_path): # Recursively traverse the files in this folder for root, _dirs, files in os.walk(disk_folder_path): for file_name in files: a_disk_file_path = os.path.join(root, file_name) # Get checksum for disk file a_disk_file_checksum = get_file_checksum( a_disk_file_path) # Construct iords path for this disk file, so can get the file from irods, and compare its checksum with disk file checksum # This is to extract the subfoler name for irods from the full disk path irods_sub_folder = root.replace( disk_folder_path + "/", "") # For some reason, join would not work here. I used string concatenation instead an_irods_file_path = irods_folder_collection_path + "/" + irods_sub_folder + "/" + file_name # Now get the file from irods try: obj = session.data_objects.get( an_irods_file_path) # obj.checksum is prepended with 'sha2:'. Remove that so we can compare it to disk file checksum an_irods_file_checksum = obj.checksum[5:] if an_irods_file_checksum != a_disk_file_checksum: print( f"Error: irods file checksum {an_irods_file_checksum} does not match disk file checksum {a_disk_file_checksum} for irods file {an_irods_file_path} and disk file {a_disk_file_path}" ) continue except (DataObjectDoesNotExist, CollectionDoesNotExist) as e: print(e) continue except NetworkException as e: print(e) continue # Delete file on disk print(f"Removing directory {disk_folder_path}") shutil.rmtree(disk_folder_path) # Update object store id update_cursor = conn.cursor() update_cursor.execute( update_sql_statement, (irods_info["object_store_id"], objectid)) updated_rows = update_cursor.rowcount if updated_rows == 1: print( "Updated object store ID to {} in dataset table for object ID {}" .format(irods_info["object_store_id"], objectid)) else: print( "Error: Failed to update object store ID to {} in dataset table for object ID {}" .format(irods_info["object_store_id"], objectid)) update_cursor.close() # Delete file on disk print(f"Removing file {disk_file_path}") os.remove(disk_file_path) except Exception as e: print(e) session.cleanup() conn.rollback() read_cursor.close() conn.close() return session.cleanup() conn.commit() read_cursor.close() conn.close()
def get_repos(sa_session, file_path, hgweb_config_dir, **kwargs): """ Load repos from DB and included tools from .xml configs. """ hgwcm = hgweb_config_manager hgwcm.hgweb_config_dir = hgweb_config_dir # Do not index deleted, deprecated, or "tool_dependency_definition" type repositories. q = sa_session.query(model.Repository).filter_by(deleted=False).filter_by( deprecated=False).order_by(model.Repository.update_time.desc()) q = q.filter(model.Repository.type != 'tool_dependency_definition') for repo in q: category_names = [] for rca in sa_session.query( model.RepositoryCategoryAssociation ).filter(model.RepositoryCategoryAssociation.repository_id == repo.id): for category in sa_session.query(model.Category).filter( model.Category.id == rca.category.id): category_names.append(category.name.lower()) categories = (",").join(category_names) repo_id = repo.id name = repo.name description = repo.description long_description = repo.long_description homepage_url = repo.homepage_url remote_repository_url = repo.remote_repository_url times_downloaded = repo.times_downloaded or 0 repo_owner_username = '' if repo.user_id is not None: user = sa_session.query( model.User).filter(model.User.id == repo.user_id).one() repo_owner_username = user.username.lower() approved = 'no' for review in repo.reviews: if review.approved == 'yes': approved = 'yes' break last_updated = pretty_print_time_interval(repo.update_time) full_last_updated = repo.update_time.strftime("%Y-%m-%d %I:%M %p") # Load all changesets of the repo for lineage. repo_path = os.path.join( hgweb_config_dir, hgwcm.get_entry( os.path.join("repos", repo.user.username, repo.name))) hg_repo = hg.repository(ui.ui(), repo_path.encode('utf-8')) lineage = [] for changeset in hg_repo.changelog: lineage.append( unicodify(changeset) + ":" + unicodify(hg_repo[changeset])) repo_lineage = str(lineage) # Parse all the tools within repo for a separate index. tools_list = [] path = os.path.join(file_path, *directory_hash_id(repo.id)) path = os.path.join(path, "repo_%d" % repo.id) if os.path.exists(path): tools_list.extend(load_one_dir(path)) for root, dirs, _files in os.walk(path): if '.hg' in dirs: dirs.remove('.hg') for dirname in dirs: tools_in_dir = load_one_dir(os.path.join(root, dirname)) tools_list.extend(tools_in_dir) yield (dict(id=unicodify(repo_id), name=unicodify(name), description=unicodify(description), long_description=unicodify(long_description), homepage_url=unicodify(homepage_url), remote_repository_url=unicodify(remote_repository_url), repo_owner_username=unicodify(repo_owner_username), times_downloaded=unicodify(times_downloaded), approved=unicodify(approved), last_updated=unicodify(last_updated), full_last_updated=unicodify(full_last_updated), tools_list=tools_list, repo_lineage=unicodify(repo_lineage), categories=unicodify(categories)))
def get_repos(sa_session, path_to_repositories): """ Load repos from DB and included tools from .xml configs. """ results = [] for repo in sa_session.query(model.Repository).filter_by( deleted=False).filter_by(deprecated=False).filter( model.Repository.type != 'tool_dependency_definition'): repo_id = repo.id name = repo.name description = repo.description long_description = repo.long_description homepage_url = repo.homepage_url remote_repository_url = repo.remote_repository_url times_downloaded = repo.times_downloaded if not isinstance(times_downloaded, (int, long)): times_downloaded = 0 repo_owner_username = '' if repo.user_id is not None: user = sa_session.query( model.User).filter(model.User.id == repo.user_id).one() repo_owner_username = user.username approved = 'no' for review in repo.reviews: if review.approved == 'yes': approved = 'yes' break # Format the time since last update to be nicely readable. last_updated = pretty_print_time_interval(repo.update_time) full_last_updated = repo.update_time.strftime("%Y-%m-%d %I:%M %p") # Parse all the tools within repo for separate index. tools_list = [] path = os.path.join(path_to_repositories, *directory_hash_id(repo.id)) path = os.path.join(path, "repo_%d" % repo.id) if os.path.exists(path): tools_list.extend(load_one_dir(path)) for root, dirs, files in os.walk(path): if '.hg' in dirs: dirs.remove('.hg') for dirname in dirs: tools_in_dir = load_one_dir(os.path.join(root, dirname)) tools_list.extend(tools_in_dir) results.append( dict(id=repo_id, name=name, description=description, long_description=long_description, homepage_url=homepage_url, remote_repository_url=remote_repository_url, repo_owner_username=repo_owner_username, times_downloaded=times_downloaded, approved=approved, last_updated=last_updated, full_last_updated=full_last_updated, tools_list=tools_list)) return results