Пример #1
0
    def __init__(self,
                 url_prefix,
                 title,
                 description,
                 reference,
                 tags=None,
                 creation_date=now(),
                 modification_date=now(),
                 fork_count=0,
                 forked_from=None,
                 forked_from_id=None):
        with lock:
            if DatasetDAO.query.get(url_prefix=url_prefix) is not None:
                raise Exception("Url prefix already taken.")

        if forked_from_id is None and forked_from is not None:
            forked_from_id = forked_from._id

        size = 0
        kwargs = {
            k: v
            for k, v in locals().items()
            if k not in ["self", "__class__", "forked_from"]
        }
        super().__init__(**kwargs)
Пример #2
0
    def __init__(self,
                 title,
                 description,
                 file_ref_id,
                 http_ref=None,
                 tags=None,
                 addition_date=None,
                 modification_date=None,
                 dataset_id=None,
                 dataset=None):
        if addition_date is None:
            addition_date = now()

        if modification_date is None:
            modification_date = now()

        if dataset_id is None and dataset is not None:
            dataset_id = [dataset._id]

        kwargs = {
            k: v
            for k, v in locals().items()
            if k not in ["self", "__class__", "datasets"]
        }
        super().__init__(**kwargs)
Пример #3
0
    def __thread_func(self):
        while not self.__stop_requested():
            if (now() - self.last_tick).total_seconds() > TIMER_TICK:
                with self.lock:
                    self.last_tick = now()

                self.do_garbage_collect()
            sleep(1)
        print("[GC] Exited.")
Пример #4
0
    def __init__(self, description, max_dataset_count, max_dataset_size, url_prefix, token_gui=None,
                 creation_date=now(), modification_date=now(), end_date=token_future_end(),
                 privileges=Privileges.RO_WATCH_DATASET):

        if token_gui is None:
            token_gui = self.generate_token()
        else:
            if self.query.get(token_gui=token_uuid) is not None:
                raise Exception("Specified token GUI already exists.")

        kwargs = {k: v for k, v in locals().items() if k not in ["self", "__class__"]}
        super().__init__(**kwargs)
Пример #5
0
    def __init__(self):
        super().__init__()
        self.get_parser = reqparse.RequestParser()
        self.delete_parser = self.get_parser
        self.post_parser = reqparse.RequestParser()
        self.session = global_config.get_session()

        arguments = {
            "token_gui": {
                "type": str,
                "required": True,
                "help": "Global Unique Identifier of the token is required.",
                "location": "json"
            }
        }

        for argument, kwargs in arguments.items():
            self.get_parser.add_argument(argument, **kwargs)

        arguments = {
            "description": {
                "type": str,
                "help": "Description for the token.",
                "location": "json"
            },
            "max_dataset_count": {
                "type": int,
                "help":
                "Max number of datasets that it is allowed to create this token.",
                "location": "json"
            },
            "max_dataset_size": {
                "type": int,
                "help": "Max size for a dataset created by this token",
                "location": "json"
            },
            "end_date": {
                "type": str,
                "help": "End date for this token, in format {}".format(now()),
                "location": "json"
            },
            "privileges": {
                "type": int,
                "help": "Privileges integer for this token",
                "location": "json"
            },
            "token_gui": {
                "type": str,
                "help": "Token GUI, if not specified a new one is generated.",
                "location": "json"
            },
            "url_prefix": {
                "type": str,
                "help": "Token URL prefix.",
                "location": "json"
            },
        }

        for argument, kwargs in arguments.items():
            self.post_parser.add_argument(argument, **kwargs)
Пример #6
0
    def parse_args(self,
                   req=None,
                   strict=False,
                   required_any_token_privileges=None,
                   required_all_token_privileges=None):
        args = super().parse_args(req=req, strict=strict)

        if required_any_token_privileges is None:
            required_any_token_privileges = []

        if required_all_token_privileges is None:
            required_all_token_privileges = []

        token_gui = args['_tok']

        if token_gui is None:
            abort(404)

        token = TokenDAO.query.get(token_gui=token_gui)

        if token is None:
            print("Token invalid {}".format(token_gui))
            abort(404)

        pbac_ok = all([bool(token.privileges & privilege) for privilege in required_all_token_privileges]) and \
                any([bool(token.privileges & privilege) for privilege in required_any_token_privileges])

        if not pbac_ok:
            abort(401)

        if token.end_date < now():
            abort(410)

        return args, token
Пример #7
0
 def add_element(self,
                 title,
                 description,
                 file_ref_id,
                 http_ref=None,
                 tags=None,
                 addition_date=now(),
                 modification_date=now()):
     return DatasetElementDAO(title,
                              description,
                              file_ref_id,
                              http_ref,
                              tags,
                              addition_date,
                              modification_date,
                              dataset=self)
Пример #8
0
    def edit_dataset(self, edit_url_prefix:str, *args, **kwargs) -> DatasetDAO:
        can_edit_inner_dataset = bool(self.token.privileges & Privileges.EDIT_DATASET)
        can_edit_others_dataset = bool(self.token.privileges & Privileges.ADMIN_EDIT_TOKEN)
        illegal_chars = self.illegal_chars

        if not any([can_edit_inner_dataset, can_edit_others_dataset]):
            abort(401, message="Your token does not have privileges enough to edit datasets.")

        try:
            url_prefix = kwargs["url_prefix"]

            if can_edit_others_dataset:
                illegal_chars = illegal_chars[1:] # "/" is allowed for admin

            if any([illegal_char in url_prefix for illegal_char in illegal_chars]):
                abort(400, message="The following chars are not allowed in the url-prefix: {}".format(illegal_chars))

            if "/" not in url_prefix:
                url_prefix="{}/{}".format(self.token.url_prefix, url_prefix)

            kwargs['url_prefix'] = url_prefix

        except KeyError as ex:
            pass

        if 'elements' in kwargs:
            abort(400, message="There is a field not allowed in the edit request.")

        if 'comments' in kwargs:
            abort(400, message="There is a field not allowed in the edit request.")

        edit_dataset = DatasetDAO.query.get(url_prefix=edit_url_prefix)

        if edit_dataset is None:
            abort(404, message="Dataset wasn't found.")

        if not can_edit_others_dataset:
            if edit_dataset.url_prefix.split("/")[0] != self.token.url_prefix:
                abort(401, message="Dataset can't be accessed.")

            # Fix: this token can only edit a dataset if the dataset is linked to it.
            if not self.token.has_dataset(edit_dataset):
                abort(401, message="Dataset can't be accessed.")

        kwargs['modification_date'] = now()

        # Modification is performed here
        for k, v in kwargs.items():
            if v is None:
                continue

            edit_dataset[k] = v

        self.session.flush()

        return edit_dataset
Пример #9
0
 def add_comment(self,
                 author_name,
                 author_link,
                 content,
                 addition_date=now()):
     return DatasetElementCommentDAO(author_name,
                                     author_link,
                                     content,
                                     addition_date,
                                     element=self)
Пример #10
0
    def __init__(self):
        super().__init__()
        self.get_parser = reqparse.RequestParser()
        self.get_parser.add_argument("url_prefix",
                                     type=str,
                                     required=False,
                                     help="URL prefix to get tokens from.")

        self.post_parser = reqparse.RequestParser()
        self.session = global_config.get_session()
        arguments = {
            "description": {
                "type": str,
                "required": True,
                "help": "Description for the dataset.",
                "location": "json"
            },
            "max_dataset_count": {
                "type": int,
                "required": True,
                "help":
                "Max number of datasets that it is allowed to create this token.",
                "location": "json"
            },
            "max_dataset_size": {
                "type": int,
                "required": True,
                "help": "Max size for a dataset created by this token",
                "location": "json"
            },
            "end_date": {
                "type": str,
                "help": "End date for this token, in format {}".format(now()),
                "location": "json"
            },
            "privileges": {
                "type": int,
                "required": True,
                "help": "Privileges integer for this token",
                "location": "json"
            },
            "url_prefix": {
                "type": str,
                "help": "Token URL prefix.",
                "location": "json"
            },
        }

        for argument, kwargs in arguments.items():
            self.post_parser.add_argument(argument, **kwargs)
Пример #11
0
        def args_wrap(*args, **kwargs):
            remote_ip = request.remote_addr

            ip_control = RestAPIDAO.query.get(ip=remote_ip)

            if ip_control is None:
                ip_control = RestAPIDAO(ip=remote_ip,
                                        last_access=now(),
                                        num_accesses=0)

            if (now() - ip_control.last_access
                ).total_seconds() > ACCESS_RESET_TIME:
                ip_control.last_access = now()
                ip_control.num_accesses = 0

            if ip_control.num_accesses > MAX_ACCESS_TIMES:
                abort(429)

            ip_control.num_accesses += 1

            session.flush()
            session.clear()

            return func(*args, **kwargs)
Пример #12
0
    def __init__(self,
                 author_name,
                 author_link,
                 content,
                 addition_date=now(),
                 element_id=None,
                 element=None):
        if element_id is None and element is not None:
            element_id = element._id

        kwargs = {
            k: v
            for k, v in locals().items()
            if k not in ["self", "__class__", "element"]
        }
        super().__init__(**kwargs)
Пример #13
0
    def __init__(self,
                 author_name,
                 author_link,
                 content,
                 addition_date=now(),
                 dataset_id=None,
                 dataset=None):
        if dataset_id is None and dataset is not None:
            dataset_id = dataset._id

        kwargs = {
            k: v
            for k, v in locals().items()
            if k not in ["self", "__class__", "datasets"]
        }
        super().__init__(**kwargs)
Пример #14
0
def __create_token__(namespace, description, max_datasets, max_elements_per_dataset, privileges, duration_in_days, end_date=None):

    illegal_chars = "/*;:,.ç´`+Ǩ^><¿?'¡¿!\"·$%&()@~¬"
    if any([i in namespace for i in illegal_chars]):
        print("[ERROR] The namespace can't hold any of the following chars:\n\"{}\"".format(illegal_chars))
        exit(-1)

    from mldatahub.odm.dataset_dao import DatasetDAO
    from mldatahub.odm.token_dao import TokenDAO

    if end_date is not None:
        end_date = end_date
    else:
        end_date = now()+relativedelta(days=+duration_in_days)

    token = TokenDAO(description=description, max_dataset_count=max_datasets, max_dataset_size=max_elements_per_dataset,
                     url_prefix=namespace, end_date=end_date,
                     privileges=privileges)

    global_config.get_session().flush()

    return token
Пример #15
0
    def __init__(self, storage=None):
        if storage is None:
            storage = global_config.get_storage()

        self.storage = storage

        drive_folder = global_config.get_google_drive_folder()
        i("Initializing PYGFolder...")
        self.pygfolder = PyGFolder()
        i("Done")

        if drive_folder.endswith("/"): drive_folder = drive_folder[:-1]

        init_file = drive_folder + "/init"
        i("Creating init file with current timestamp...")
        self.pygfolder[init_file] = as_bytes(now())
        i("Done")
        i("Accessing folder...")
        self.pygfolder = self.pygfolder[drive_folder]
        i("Done")
        self.uploader = Thread(target=self.__uploader__, daemon=True)
        self.downloader = Thread(target=self.__downloader__, daemon=True)
        self.uploader.start()
        self.downloader.start()
Пример #16
0
def token_future_end():
    return now() + relativedelta(years=+500)
Пример #17
0
class GarbageCollector(object):
    """
    Class whose purpose is to clean the filesystem from files that lost references.
    This is a GarbageCollector.

    In order to do this optimally, fortunately the storage class keeps a set with all the filenames he worked with,
    it is guaranteed that it is up-to-date and persistent across server reboots.
    It is better to iterate over this structure rather than the filesystem itself.
    """
    lock = threading.Lock()
    do_stop = False
    storage = global_config.get_storage() # type: GenericStorage
    last_tick = now() - datetime.timedelta(minutes=60)

    def __init__(self):
        self.thread = Thread(target=self.__thread_func, daemon=True)
        self.thread.start()
        self.previous_unused_files = {}

    def __stop_requested(self):
        with self.lock:
            value = self.do_stop
        return value

    def __thread_func(self):
        while not self.__stop_requested():
            if (now() - self.last_tick).total_seconds() > TIMER_TICK:
                with self.lock:
                    self.last_tick = now()

                self.do_garbage_collect()
            sleep(1)
        print("[GC] Exited.")

    def collect_unused_files(self):
        """
        Searchs for unused files in the DB and returns a list of ids.
        :return: list with IDs of the unused files.
        """
        unused_files = []

        files_count = len(self.storage)

        print("[GC] {} files to be checked.".format(files_count))

        sleep_batch=50
        files_per_second = [0]
        files_per_second_avg = 0
        time_remaining = -1

        with Measure() as timing:
            for index, file in enumerate(self.storage):
                if DatasetElementDAO.query.get(file_ref_id=file._id) is None:
                    unused_files.append(file._id)

                if index % sleep_batch == 0:
                    sleep(0.1)

                if len(files_per_second) < 5:
                    files_per_second[-1] += 1
                    if timing.elapsed().seconds >= 1:
                        files_per_second.append(0)
                        timing.reset()

                    files_per_second_avg = sum(files_per_second) / len(files_per_second)
                    time_remaining = ""
                else:
                    time_remaining = " {} remaining".format(time_left_as_str((len(self.storage) - index) // files_per_second_avg))

                if self.__stop_requested():
                    break

                progress(index+1, files_count, "{} files are orphan.{}".format(len(unused_files), time_remaining))

        print("")
        return unused_files

    def do_garbage_collect(self):
        print("[GC] Collecting garbage...")

        global_config.session.clear()

        # 1. We retrieve the unused files.
        unused_files = self.collect_unused_files()

        # 2. We check how many unused files are in common with the previous unused files.
        new_unused_files = []
        remove_files = []
        print("[GC] Comparing {} unused files to previous {} unused files.".format(len(unused_files), len(self.previous_unused_files)))

        print("[GC] Cleaning {} elements...".format(len(remove_files)))

        for index, file in enumerate(unused_files):
            if file in self.previous_unused_files:
                remove_files.append(file)
            else:
                new_unused_files.append(file)

        # 3. We delete by batches
        files_count = 0
        for list_ids in segment(remove_files, 50):
            files_count += len(list_ids)
            self.storage.delete_files(list_ids)
            progress(files_count, len(remove_files), "{} files garbage collected.".format(files_count))
            sleep(0.1)

        self.previous_unused_files = set(new_unused_files)

        print("[GC] Cleaned {} elements...".format(len(remove_files)))

        return files_count

    def stop(self, wait_for_finish=True):
        with self.lock:
            self.do_stop = True

        if wait_for_finish:
            self.thread.join()

    def __del__(self):
        self.stop()