Пример #1
0
    def create(self, obj, **kwargs):
        if not self.exists(obj, **kwargs):

            # Pull out locally used fields
            extra_dir = kwargs.get('extra_dir', None)
            extra_dir_at_root = kwargs.get('extra_dir_at_root', False)
            dir_only = kwargs.get('dir_only', False)
            alt_name = kwargs.get('alt_name', None)

            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))

            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)

            # Create given directory in cache
            cache_dir = os.path.join(self.staging_path, rel_path)
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)

            # Although not really necessary to create S3 folders (because S3 has
            # flat namespace), do so for consistency with the regular file system
            # S3 folders are marked by having trailing '/' so add it now
            # s3_dir = '%s/' % rel_path
            # self._push_to_os(s3_dir, from_string='')
            # If instructed, create the dataset in cache & in S3
            if not dir_only:
                rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
                open(os.path.join(self.staging_path, rel_path), 'w').close()
                self._push_to_os(rel_path, from_string='')
Пример #2
0
    def create(self, obj, **kwargs):
        """Touch a file (aka create empty), if it doesn't exist"""
        if not self.exists(obj, **kwargs):
            # Pull out locally used fields
            extra_dir = kwargs.get('extra_dir', None)
            extra_dir_at_root = kwargs.get('extra_dir_at_root', False)
            dir_only = kwargs.get('dir_only', False)
            alt_name = kwargs.get('alt_name', None)

            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))

            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)

            # Create given directory in cache
            cache_dir = os.path.join(self.staging_path, rel_path)
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)

            if dir_only:
                self.pithos.upload_from_string(
                    rel_path, '', content_type='application/directory')
            else:
                rel_path = os.path.join(
                    rel_path,
                    alt_name if alt_name else 'dataset_{0}.dat'.format(obj.id))
                new_file = os.path.join(self.staging_path, rel_path)
                open(new_file, 'w').close()
                self.pithos.upload_from_string(rel_path, '')
Пример #3
0
    def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but S3 will not
            # follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        rel_path = os.path.join(*directory_hash_id(obj.id))
        if extra_dir is not None:
            if extra_dir_at_root:
                rel_path = os.path.join(extra_dir, rel_path)
            else:
                rel_path = os.path.join(rel_path, extra_dir)

        # for JOB_WORK directory
        if obj_dir:
            rel_path = os.path.join(rel_path, str(obj.id))
        if base_dir:
            base = self.extra_dirs.get(base_dir)
            return os.path.join(base, rel_path)

        # S3 folders are marked by having trailing '/' so add it now
        rel_path = '%s/' % rel_path

        if not dir_only:
            rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
        return rel_path
Пример #4
0
    def create(self, obj, **kwargs):
        if not self.exists(obj, **kwargs):

            # Pull out locally used fields
            extra_dir = kwargs.get('extra_dir', None)
            extra_dir_at_root = kwargs.get('extra_dir_at_root', False)
            dir_only = kwargs.get('dir_only', False)
            alt_name = kwargs.get('alt_name', None)

            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))

            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)

            # Create given directory in cache
            cache_dir = os.path.join(self.staging_path, rel_path)
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)

            if not dir_only:
                rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
                open(os.path.join(self.staging_path, rel_path), 'w').close()
                self._push_to_os(rel_path, from_string='')
Пример #5
0
    def job_properties(self, job_id, job_directory):
        pid = open(self.__pid_file(job_directory), "r").read().strip()
        contents = os.listdir(job_directory)
        try:
            rel_path = filter(self._is_instrumented_collectl_log, contents)[0]
            path = os.path.join(job_directory, rel_path)
        except IndexError:
            message = "Failed to find collectl log in directory %s, files were %s" % (job_directory, contents)
            raise Exception(message)

        properties = dict(
            pid=int(pid),
        )

        if self.saved_logs_path:
            destination_rel_dir = os.path.join(*util.directory_hash_id(job_id))
            destination_rel_path = os.path.join(destination_rel_dir, rel_path)
            destination_path = os.path.join(self.saved_logs_path, destination_rel_path)
            destination_dir = os.path.dirname(destination_path)
            if not os.path.isdir(destination_dir):
                os.makedirs(destination_dir)
            shutil.copyfile(path, destination_path)
            properties["raw_log_path"] = destination_rel_path

        if self.summarize_process_data:
            # Run collectl in playback and generate statistics of interest
            summary_statistics = self.__summarize_process_data(pid, path)
            for statistic, value in summary_statistics:
                properties["process_%s" % "_".join(statistic)] = value

        return properties
Пример #6
0
def get_repos( sa_session, path_to_repositories ):
    """
    Load repos from DB and included tools from .xml configs.
    """
    results = []
    for repo in sa_session.query( model.Repository ).filter_by( deleted=False ).filter_by( deprecated=False ).filter( model.Repository.type != 'tool_dependency_definition' ):

        repo_id = repo.id
        name = repo.name
        description = repo.description
        long_description = repo.long_description
        homepage_url = repo.homepage_url
        remote_repository_url = repo.remote_repository_url

        times_downloaded = repo.times_downloaded
        if not isinstance( times_downloaded, ( int, long ) ):
            times_downloaded = 0

        repo_owner_username = ''
        if repo.user_id is not None:
            user = sa_session.query( model.User ).filter( model.User.id == repo.user_id ).one()
            repo_owner_username = user.username

        approved = 'no'
        for review in repo.reviews:
            if review.approved == 'yes':
                approved = 'yes'
                break

        #  Format the time since last update to be nicely readable.
        last_updated = pretty_print_time_interval( repo.update_time )
        full_last_updated = repo.update_time.strftime( "%Y-%m-%d %I:%M %p" )

        #  Parse all the tools within repo for separate index.
        tools_list = []
        path = os.path.join( path_to_repositories, *directory_hash_id( repo.id ) )
        path = os.path.join( path, "repo_%d" % repo.id )
        if os.path.exists(path):
            tools_list.extend( load_one_dir( path ) )
            for root, dirs, files in os.walk( path ):
                if '.hg' in dirs:
                    dirs.remove('.hg')
                for dirname in dirs:
                    tools_in_dir = load_one_dir( os.path.join( root, dirname ) )
                    tools_list.extend( tools_in_dir )

        results.append(dict( id=repo_id,
                             name=name,
                             description=description,
                             long_description=long_description,
                             homepage_url=homepage_url,
                             remote_repository_url=remote_repository_url,
                             repo_owner_username=repo_owner_username,
                             times_downloaded=times_downloaded,
                             approved=approved,
                             last_updated=last_updated,
                             full_last_updated=full_last_updated,
                             tools_list=tools_list ) )
    return results
Пример #7
0
    def create(self, obj, **kwargs):
        if not self.exists(obj, **kwargs):

            # Pull out locally used fields
            extra_dir = kwargs.get('extra_dir', None)
            extra_dir_at_root = kwargs.get('extra_dir_at_root', False)
            dir_only = kwargs.get('dir_only', False)
            alt_name = kwargs.get('alt_name', None)

            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))

            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)

            # Create given directory in cache
            cache_dir = os.path.join(self.staging_path, rel_path)
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)

            # Although not really necessary to create S3 folders (because S3 has
            # flat namespace), do so for consistency with the regular file system
            # S3 folders are marked by having trailing '/' so add it now
            # s3_dir = '%s/' % rel_path
            # self._push_to_os(s3_dir, from_string='')
            # If instructed, create the dataset in cache & in S3
            if not dir_only:
                rel_path = os.path.join(
                    rel_path,
                    alt_name if alt_name else "dataset_%s.dat" % obj.id)
                open(os.path.join(self.staging_path, rel_path), 'w').close()
                self._push_to_os(rel_path, from_string='')
Пример #8
0
    def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None,
                        obj_dir=False, **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but S3 will not
            # follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        rel_path = os.path.join(*directory_hash_id(obj.id))
        if extra_dir is not None:
            if extra_dir_at_root:
                rel_path = os.path.join(extra_dir, rel_path)
            else:
                rel_path = os.path.join(rel_path, extra_dir)

        # for JOB_WORK directory
        if obj_dir:
            rel_path = os.path.join(rel_path, str(obj.id))
        if base_dir:
            base = self.extra_dirs.get(base_dir)
            return os.path.join(base, rel_path)

        # S3 folders are marked by having trailing '/' so add it now
        rel_path = '%s/' % rel_path

        if not dir_only:
            rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
        return rel_path
Пример #9
0
    def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs):
        """
        Construct the absolute path for accessing the object identified by `obj.id`.

        :type base_dir: string
        :param base_dir: A key in self.extra_dirs corresponding to the base
                         directory in which this object should be created, or
                         None to specify the default directory.

        :type dir_only: boolean
        :param dir_only: If True, check only the path where the file
                         identified by `obj` should be located, not the
                         dataset itself. This option applies to `extra_dir`
                         argument as well.

        :type extra_dir: string
        :param extra_dir: Append the value of this parameter to the expected
            path used to access the object identified by `obj` (e.g.,
            /files/000/<extra_dir>/dataset_10.dat).

        :type alt_name: string
        :param alt_name: Use this name as the alternative name for the returned
                         dataset rather than the default.

        :type old_style: boolean
        param old_style: This option is used for backward compatibility. If
            `True` then the composed directory structure does not include a
            hash id (e.g., /files/dataset_10.dat (old) vs.
            /files/000/dataset_10.dat (new))
        """
        base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path))
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name and not safe_relpath(alt_name):
            log.warning('alt_name would locate path outside dir: %s', alt_name)
            raise ObjectInvalid("The requested object is invalid")
        if old_style:
            if extra_dir is not None:
                path = os.path.join(base, extra_dir)
            else:
                path = base
        else:
            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))
            # Create a subdirectory for the object ID
            if obj_dir:
                rel_path = os.path.join(rel_path, str(obj.id))
            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)
            path = os.path.join(base, rel_path)
        if not dir_only:
            path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
        return os.path.abspath(path)
Пример #10
0
 def rel_path_for_uuid_test(self):
     rel_path = os.path.join(*directory_hash_id(self.uuid))
     return rel_path
Пример #11
0
def get_repos(sa_session, path_to_repositories, hgweb_config_dir):
    """
    Load repos from DB and included tools from .xml configs.
    """
    hgwcm = HgWebConfigManager()
    hgwcm.hgweb_config_dir = hgweb_config_dir
    results = []
    for repo in sa_session.query(model.Repository).filter_by(
            deleted=False).filter_by(deprecated=False).filter(
                model.Repository.type != 'tool_dependency_definition'):
        category_names = []
        for rca in sa_session.query(
                model.RepositoryCategoryAssociation
        ).filter(model.RepositoryCategoryAssociation.repository_id == repo.id):
            for category in sa_session.query(model.Category).filter(
                    model.Category.id == rca.category.id):
                category_names.append(category.name.lower())
        categories = (",").join(category_names)
        repo_id = repo.id
        name = repo.name
        description = repo.description
        long_description = repo.long_description
        homepage_url = repo.homepage_url
        remote_repository_url = repo.remote_repository_url

        times_downloaded = repo.times_downloaded
        if not isinstance(times_downloaded, (int, long)):
            times_downloaded = 0

        repo_owner_username = ''
        if repo.user_id is not None:
            user = sa_session.query(
                model.User).filter(model.User.id == repo.user_id).one()
            repo_owner_username = user.username.lower()

        approved = 'no'
        for review in repo.reviews:
            if review.approved == 'yes':
                approved = 'yes'
                break

        #  Format the time since last update to be nicely readable.
        last_updated = pretty_print_time_interval(repo.update_time)
        full_last_updated = repo.update_time.strftime("%Y-%m-%d %I:%M %p")

        # load all changesets of the repo
        repo_path = hgwcm.get_entry(
            os.path.join("repos", repo.user.username, repo.name))
        hg_repo = hg.repository(ui.ui(), repo_path)
        lineage = []
        for changeset in hg_repo.changelog:
            lineage.append(
                str(changeset) + ":" + str(hg_repo.changectx(changeset)))
        repo_lineage = str(lineage)

        #  Parse all the tools within repo for separate index.
        tools_list = []
        path = os.path.join(path_to_repositories, *directory_hash_id(repo.id))
        path = os.path.join(path, "repo_%d" % repo.id)
        if os.path.exists(path):
            tools_list.extend(load_one_dir(path))
            for root, dirs, files in os.walk(path):
                if '.hg' in dirs:
                    dirs.remove('.hg')
                for dirname in dirs:
                    tools_in_dir = load_one_dir(os.path.join(root, dirname))
                    tools_list.extend(tools_in_dir)

        results.append(
            dict(id=repo_id,
                 name=name,
                 description=description,
                 long_description=long_description,
                 homepage_url=homepage_url,
                 remote_repository_url=remote_repository_url,
                 repo_owner_username=repo_owner_username,
                 times_downloaded=times_downloaded,
                 approved=approved,
                 last_updated=last_updated,
                 full_last_updated=full_last_updated,
                 tools_list=tools_list,
                 repo_lineage=repo_lineage,
                 categories=categories))
    return results
Пример #12
0
def copy_files_to_irods(start_dataset_id, end_dataset_id,
                        object_store_info_file, irods_info_file,
                        db_connection_info_file, copy_or_checksum):
    conn = None
    session = None
    osi_keys = None
    read_sql_statement = None
    read_cursor = None
    args = None
    rows = None
    objectid = None
    object_store_id = None
    uuid_without_dash = None
    uuid_with_dash = None
    object_store_path = None
    disk_sub_folder = None
    irods_sub_folder = None
    disk_file_path = None
    disk_folder_path = None
    irods_file_path = None
    irods_file_collection_path = None
    irods_folder_collection_path = None
    options = None
    iput_command = None
    object_store_info = None
    irods_info = None
    db_connection_info = None

    if start_dataset_id > end_dataset_id:
        print(
            f"Error: start_dataset_id {start_dataset_id} cannot be larger than end_dataset_id {end_dataset_id}"
        )
        return

    # read object_store_info file
    with open(object_store_info_file, mode="r") as osi:
        object_store_info = json.load(osi)
    osi_keys = tuple(object_store_info.keys())

    # read irods_info_file
    with open(irods_info_file, mode="r") as ii:
        irods_info = json.load(ii)

    # read db_connectin_info file
    with open(db_connection_info_file, mode="r") as dci:
        db_connection_info = json.load(dci)

    try:
        # declare a new PostgreSQL connection object
        conn = connect(dbname=db_connection_info["dbname"],
                       user=db_connection_info["user"],
                       host=db_connection_info["host"],
                       password=db_connection_info["password"])
        conn.cursor()

    except Exception as e:
        print(e)
        return

    session = iRODSSession(host=irods_info["host"],
                           port=irods_info["port"],
                           user=irods_info["user"],
                           password=irods_info["password"],
                           zone=irods_info["zone"])
    session.connection_timeout = int(irods_info["timeout"])

    osi_keys = tuple(object_store_info.keys())
    read_sql_statement = """SELECT id, object_store_id, uuid
                            FROM dataset
                            WHERE state = %s
                            AND NOT deleted
                            AND NOT purged
                            AND id >= %s
                            AND id <= %s
                            AND object_store_id IN %s"""

    update_sql_statement = """UPDATE dataset
                              SET object_store_id = %s
                              WHERE id = %s"""

    try:
        read_cursor = conn.cursor()
        args = ('ok', start_dataset_id, end_dataset_id, osi_keys)
        read_cursor.execute(read_sql_statement, args)
        rows = read_cursor.fetchall()
        for row in rows:
            objectid = row[0]
            object_store_id = row[1]
            uuid_without_dash = row[2]
            uuid_with_dash = str(uuid.UUID(uuid_without_dash))

            object_store_path = object_store_info.get(object_store_id)
            if object_store_path is None:
                print(
                    f"Error: object_store_info_file does not have a value for {object_store_id}"
                )
                raise Exception

            irods_resc = get_irods_resource(conn, objectid, object_store_id,
                                            irods_info)

            disk_sub_folder = os.path.join(*directory_hash_id(objectid))
            irods_sub_folder = os.path.join(*directory_hash_id(uuid_with_dash))
            disk_file_path = os.path.join(object_store_path, disk_sub_folder,
                                          "dataset_" + str(objectid) + ".dat")
            disk_folder_path = os.path.join(
                object_store_path, disk_sub_folder,
                "dataset_" + str(objectid) + "_files")
            irods_file_path = os.path.join(
                irods_info["home"], irods_sub_folder,
                "dataset_" + str(uuid_with_dash) + ".dat")
            irods_file_collection_path = os.path.join(irods_info["home"],
                                                      irods_sub_folder)
            irods_folder_collection_path = os.path.join(
                irods_file_collection_path,
                "dataset_" + str(uuid_with_dash) + "_files")

            if copy_or_checksum == "copy":
                # Create the collection
                session.collections.create(irods_file_collection_path)

                # Add disk file to collection
                options = {kw.REG_CHKSUM_KW: '', kw.RESC_NAME_KW: irods_resc}
                session.data_objects.put(disk_file_path, irods_file_path,
                                         **options)
                print(
                    f"Copied disk file {disk_file_path} to irods {irods_file_path}"
                )

                if os.path.isdir(disk_folder_path):
                    disk_folder_path_all_files = disk_folder_path + "/*"

                    # Create the collection
                    session.collections.create(irods_folder_collection_path)

                    iput_command = "iput -R " + irods_resc + " -rk " + disk_folder_path_all_files + " " + irods_folder_collection_path
                    subprocess.call(iput_command, shell=True)
                    print(
                        f"Copied disk folder {disk_folder_path} to irods {irods_folder_collection_path}"
                    )

            if copy_or_checksum == "checksum":
                # Calculate disk file checksum. Then get the file checksum from irods and compare it with the calculated disk file checksum
                # Note that disk file checksum is ASCII, whereas irods file checksum is Unicode. get_file_checksum() converts checksum to unicode
                disk_file_checksum = get_file_checksum(disk_file_path)
                # Now get the file from irods
                try:
                    obj = session.data_objects.get(irods_file_path)
                    # obj.checksum is prepended with 'sha2:'. Remove that so we can compare it to disk file checksum
                    irods_file_checksum = obj.checksum[5:]
                    if irods_file_checksum != disk_file_checksum:
                        print(
                            f"Error: irods file checksum {irods_file_checksum} does not match disk file checksum {disk_file_checksum} for irods file {irods_file_path} and disk file {disk_file_path}"
                        )
                        continue
                except (DataObjectDoesNotExist, CollectionDoesNotExist) as e:
                    print(e)
                    continue
                except NetworkException as e:
                    print(e)
                    continue

                # Recursively verify that the checksum of all files in this folder matches that in irods
                if os.path.isdir(disk_folder_path):
                    # Recursively traverse the files in this folder
                    for root, _dirs, files in os.walk(disk_folder_path):
                        for file_name in files:
                            a_disk_file_path = os.path.join(root, file_name)
                            # Get checksum for disk file
                            a_disk_file_checksum = get_file_checksum(
                                a_disk_file_path)

                            # Construct iords path for this disk file, so can get the file from irods, and compare its checksum with disk file checksum
                            # This is to extract the subfoler name for irods from the full disk path
                            irods_sub_folder = root.replace(
                                disk_folder_path + "/", "")
                            # For some reason, join would not work here. I used string concatenation instead
                            an_irods_file_path = irods_folder_collection_path + "/" + irods_sub_folder + "/" + file_name

                            # Now get the file from irods
                            try:
                                obj = session.data_objects.get(
                                    an_irods_file_path)
                                # obj.checksum is prepended with 'sha2:'. Remove that so we can compare it to disk file checksum
                                an_irods_file_checksum = obj.checksum[5:]
                                if an_irods_file_checksum != a_disk_file_checksum:
                                    print(
                                        f"Error: irods file checksum {an_irods_file_checksum} does not match disk file checksum {a_disk_file_checksum} for irods file {an_irods_file_path} and disk file {a_disk_file_path}"
                                    )
                                    continue
                            except (DataObjectDoesNotExist,
                                    CollectionDoesNotExist) as e:
                                print(e)
                                continue
                            except NetworkException as e:
                                print(e)
                                continue

                    # Delete file on disk
                    print(f"Removing directory {disk_folder_path}")
                    shutil.rmtree(disk_folder_path)

                # Update object store id
                update_cursor = conn.cursor()
                update_cursor.execute(
                    update_sql_statement,
                    (irods_info["object_store_id"], objectid))
                updated_rows = update_cursor.rowcount
                if updated_rows == 1:
                    print(
                        "Updated object store ID to {} in dataset table for object ID {}"
                        .format(irods_info["object_store_id"], objectid))
                else:
                    print(
                        "Error: Failed to update object store ID to {} in dataset table for object ID {}"
                        .format(irods_info["object_store_id"], objectid))
                update_cursor.close()

                # Delete file on disk
                print(f"Removing file {disk_file_path}")
                os.remove(disk_file_path)

    except Exception as e:
        print(e)
        session.cleanup()
        conn.rollback()
        read_cursor.close()
        conn.close()
        return

    session.cleanup()
    conn.commit()
    read_cursor.close()
    conn.close()
Пример #13
0
def get_repos(sa_session, file_path, hgweb_config_dir, **kwargs):
    """
    Load repos from DB and included tools from .xml configs.
    """
    hgwcm = hgweb_config_manager
    hgwcm.hgweb_config_dir = hgweb_config_dir
    # Do not index deleted, deprecated, or "tool_dependency_definition" type repositories.
    q = sa_session.query(model.Repository).filter_by(deleted=False).filter_by(
        deprecated=False).order_by(model.Repository.update_time.desc())
    q = q.filter(model.Repository.type != 'tool_dependency_definition')
    for repo in q:
        category_names = []
        for rca in sa_session.query(
                model.RepositoryCategoryAssociation
        ).filter(model.RepositoryCategoryAssociation.repository_id == repo.id):
            for category in sa_session.query(model.Category).filter(
                    model.Category.id == rca.category.id):
                category_names.append(category.name.lower())
        categories = (",").join(category_names)
        repo_id = repo.id
        name = repo.name
        description = repo.description
        long_description = repo.long_description
        homepage_url = repo.homepage_url
        remote_repository_url = repo.remote_repository_url

        times_downloaded = repo.times_downloaded or 0

        repo_owner_username = ''
        if repo.user_id is not None:
            user = sa_session.query(
                model.User).filter(model.User.id == repo.user_id).one()
            repo_owner_username = user.username.lower()

        approved = 'no'
        for review in repo.reviews:
            if review.approved == 'yes':
                approved = 'yes'
                break

        last_updated = pretty_print_time_interval(repo.update_time)
        full_last_updated = repo.update_time.strftime("%Y-%m-%d %I:%M %p")

        # Load all changesets of the repo for lineage.
        repo_path = os.path.join(
            hgweb_config_dir,
            hgwcm.get_entry(
                os.path.join("repos", repo.user.username, repo.name)))
        hg_repo = hg.repository(ui.ui(), repo_path.encode('utf-8'))
        lineage = []
        for changeset in hg_repo.changelog:
            lineage.append(
                unicodify(changeset) + ":" + unicodify(hg_repo[changeset]))
        repo_lineage = str(lineage)

        #  Parse all the tools within repo for a separate index.
        tools_list = []
        path = os.path.join(file_path, *directory_hash_id(repo.id))
        path = os.path.join(path, "repo_%d" % repo.id)
        if os.path.exists(path):
            tools_list.extend(load_one_dir(path))
            for root, dirs, _files in os.walk(path):
                if '.hg' in dirs:
                    dirs.remove('.hg')
                for dirname in dirs:
                    tools_in_dir = load_one_dir(os.path.join(root, dirname))
                    tools_list.extend(tools_in_dir)

        yield (dict(id=unicodify(repo_id),
                    name=unicodify(name),
                    description=unicodify(description),
                    long_description=unicodify(long_description),
                    homepage_url=unicodify(homepage_url),
                    remote_repository_url=unicodify(remote_repository_url),
                    repo_owner_username=unicodify(repo_owner_username),
                    times_downloaded=unicodify(times_downloaded),
                    approved=unicodify(approved),
                    last_updated=unicodify(last_updated),
                    full_last_updated=unicodify(full_last_updated),
                    tools_list=tools_list,
                    repo_lineage=unicodify(repo_lineage),
                    categories=unicodify(categories)))
Пример #14
0
def get_repos(sa_session, path_to_repositories):
    """
    Load repos from DB and included tools from .xml configs.
    """
    results = []
    for repo in sa_session.query(model.Repository).filter_by(
            deleted=False).filter_by(deprecated=False).filter(
                model.Repository.type != 'tool_dependency_definition'):

        repo_id = repo.id
        name = repo.name
        description = repo.description
        long_description = repo.long_description
        homepage_url = repo.homepage_url
        remote_repository_url = repo.remote_repository_url

        times_downloaded = repo.times_downloaded
        if not isinstance(times_downloaded, (int, long)):
            times_downloaded = 0

        repo_owner_username = ''
        if repo.user_id is not None:
            user = sa_session.query(
                model.User).filter(model.User.id == repo.user_id).one()
            repo_owner_username = user.username

        approved = 'no'
        for review in repo.reviews:
            if review.approved == 'yes':
                approved = 'yes'
                break

        #  Format the time since last update to be nicely readable.
        last_updated = pretty_print_time_interval(repo.update_time)
        full_last_updated = repo.update_time.strftime("%Y-%m-%d %I:%M %p")

        #  Parse all the tools within repo for separate index.
        tools_list = []
        path = os.path.join(path_to_repositories, *directory_hash_id(repo.id))
        path = os.path.join(path, "repo_%d" % repo.id)
        if os.path.exists(path):
            tools_list.extend(load_one_dir(path))
            for root, dirs, files in os.walk(path):
                if '.hg' in dirs:
                    dirs.remove('.hg')
                for dirname in dirs:
                    tools_in_dir = load_one_dir(os.path.join(root, dirname))
                    tools_list.extend(tools_in_dir)

        results.append(
            dict(id=repo_id,
                 name=name,
                 description=description,
                 long_description=long_description,
                 homepage_url=homepage_url,
                 remote_repository_url=remote_repository_url,
                 repo_owner_username=repo_owner_username,
                 times_downloaded=times_downloaded,
                 approved=approved,
                 last_updated=last_updated,
                 full_last_updated=full_last_updated,
                 tools_list=tools_list))
    return results