示例#1
0
    def query(self, q, provider=None, uri=None):
        filterctx = self.filters.build_filter_context(q)

        # Build scraper contexts
        if provider or uri:
            scrapectxs = [self.scraper.build_context(provider=provider,
                                                     uri=uri)]
        else:
            scrapectxs = self.scraper.build_contexts_for_query(q)

        results = self.scraper.process(*scrapectxs)
        results = analyze.analyze(*results, mp=False)

        if not results:
            msg = "No results found for %r"
            msg = msg % q
            print(msg)
            return

        msg = "Found %s sources"
        msg = msg % (len(results),)
        print(msg)

        # Filter results
        results = self.filters.apply(filterctx, results)
        msg = "Got %s matching sources for %r"
        msg = msg % (len(results), q)
        print(msg)

        groups = self.filters.sort(results)
        return groups
示例#2
0
    def test_source_with_invalid_type_hint(self):
        src = build_source('foo')  # build_source doesnt do parse
        src.hints = {'type': 'other'}

        asrc = analyze(src, mp=False)[0]

        self.assertTrue(isinstance(asrc.entity, Movie))
示例#3
0
    def run_analyze(self, app, args):
        raw = json.loads(args.input.read())
        if isinstance(raw, dict):
            raw = [raw]

        raw = [schema.Source(**x) for x in raw]
        proc = analyze.analyze(*raw, mp=False)

        output = json.dumps([x.dict() for x in proc],
                            indent=2,
                            default=_json_encode_hook)
        args.output.write(output)
示例#4
0
    def do_query2(self, app, args):
        def _parse_queryparams(pairs):
            for pair in pairs:
                key, value = pair.split('=', 1)
                if not key or not value:
                    raise ValueError(pair)

                yield (key, value)

        if not args.queryparams and not args.querystring:
            errmsg = "filter or querystring are requierd"
            print(errmsg, file=sys.stderr)
            raise extensions.CommandUsageError()

        q = {}
        if args.querystring:
            q = query.Query.fromstring(args.querystring)

        if args.queryparams:
            params = dict(_parse_queryparams(args.queryparams))
            q = query.Query(**params)

        # Setup filters before scrape anything
        query_engine = query.Engine()
        try:
            filters = query_engine.build_filter(q)
        except query.MissingFiltersError as e:
            errmsg = "Unknow filters: %s"
            errmsg = errmsg % ', '.join(e.args[0])
            print(errmsg, file=sys.stderr)
            raise extensions.CommandUsageError()

        # Build scrape ctxs and process them
        scrape_engine = scraper.Engine()
        ctxs = scrape_engine.build_contexts_for_query(q)
        sources = scrape_engine.process(*ctxs)
        sources = analyze.analyze(*sources)

        # Pass sources thru filters
        results = query_engine.apply(filters, sources)
        results = query_engine.sort(results)

        # Output
        results = [[entity.dict(), [src.dict() for src in sources]]
                   for (entity, sources) in results]
        output = json.dumps(results, indent=2, default=_json_encode_hook)
        args.output.write(output)