Пример #1
0
    def __init__(self, model: dict):

        # Load external
        if model:
            if isinstance(model, str):
                _model = detect_actions(model)

                if not model:
                    raise CrawlinoValueError("Invalid model input values",
                                             exc_info=True,
                                             extra={"input_model": model})
                else:
                    model = _model

            # Inline declaration
            else:

                self.name = gt(model, "name", None)

                if not self.name:
                    raise CrawlinoValueError("Error in Models: Models must "
                                             "have 'type' property.")

                self.fields = CMModelsFields(gt(model, "fields", None))
                self.mappers = CMModelsMappers(gt(model, "mappers", None))
Пример #2
0
    def __init__(self, crawler_file: File):
        self.crawler_file = crawler_file
        self.name = gt(self.crawler_file.parsed, "name", None)
        self.description = gt(self.crawler_file.parsed, "description", "")
        self.tags = gt(self.crawler_file.parsed, "tags", [])

        if not self.name:
            raise CrawlinoValueError("Error in self.models: self.models must "
                                     "have 'name' property.")
        if not isinstance(self.tags, list) or \
                not all(isinstance(x, str) for x in self.tags):
            raise CrawlinoValueError("tags must be a list of strings")

        self.config = [
            CMConfig(type=x.get("type", None),
                     config=x.get("config", None),
                     name=x.get("name", None))
            for x in self.crawler_file.parsed.get("config", [])
        ]

        self.sources = [
            CMSource(type=x.get("type", None),
                     config=x.get("config", None),
                     name=x.get("name", None))
            for x in self.crawler_file.parsed.get("sources")
        ]

        self.model = [
            CMSource(type=x.get("type", None),
                     config=x.get("config", None),
                     name=x.get("name", None))
            for x in self.crawler_file.parsed.get("sources")
        ]

        _input = self.crawler_file.parsed.get("input")
        self.input = [
            CMInput(type=_input.get("type", None),
                    config=_input.get("config", None),
                    name=_input.get("name", None))
        ]

        _extractors = self.crawler_file.parsed.get("extractors", {})
        self.extractors = [
            CMRuleSet(x.get("ruleSet")) for x in _extractors if "ruleSet" in x
        ]

        self.hooks = [
            CMHook(type=x.get("type", None),
                   config=x.get("config", None),
                   name=x.get("name", None))
            for x in self.crawler_file.parsed.get("hooks", [])
        ]
        # Setting default hook
        if not self.hooks:
            self.hooks = [CMHook(type="print", config=None, name=None)]
Пример #3
0
    def __init__(self, fields: List[Dict]):
        self._raw_data = fields
        self.fields = {}

        if not fields:
            return

        for field in fields:
            f = CMModelsField(field["name"], field["name"],
                              gt(field, "key", False))

            if f.name in self.fields.keys():
                raise CrawlinoFormatError("Repeated property type for fields",
                                          exc_info=True,
                                          extra={'repeated_field': f.name})

            self.fields[f.name] = f
Пример #4
0
def test_gt_dict_return_ok():

    a = {"hello": "world"}

    assert gt(a, "hello", None) == "world"
Пример #5
0
def test_gt_non_dict_ok():

    a = 1

    assert gt(a, "xxx", "world") == 1
Пример #6
0
def test_gt_dict_return_default():

    a = {"hello": "world"}

    assert gt(a, "xxx", "world") == "world"