Exemplos de search em Python, exemplos de skid.index.search em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: __main__.py Projeto: timvieira/skid

def rm(q):
    "Remove skid-mark associated with cached file."

    cached = q.strip()
    cached = re.sub('^file://', '', cached)   # remove "file://" prefix

    if cached.startswith(config.CACHE):
        # remove cached file
        os.system('rm -f %s' % cached)
        # remove corresponding '.d' directory and its contents
        os.system('rm -rf %s.d' % cached)
        # remove file from whoosh index.
        index.delete(cached)

    else:
        from skid.index import search
        results = [dict(x) for x in search(q)]
        if len(results) == 0:
            # Should only happen if user hasn't done run skid-update since
            # adding the paper being deleted.
            print('No matches. Make sure skid is up-to-date by running `skid update`.')
        elif len(results) == 1:
            [hit] = results
            print()
            print(hit['title'])
            print(colors.green % "Are you sure you'd like to delete this document [Y/n]?", end=' ')
            if input().strip().lower() in ('y','yes',''):
                if rm_cached(hit['cached']):
                    print(colors.yellow % 'Successfully deleted.')
        else:
            assert False, 'Multiple (%s) results found for query %r. ' \
                'Refine query and try again.' \
                % (len(results), q)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: __main__.py Projeto: timvieira/skid

def main():

    if len(sys.argv) <= 1:
        print(', '.join(sorted(cmd.ALL)))
        return

    command = sys.argv.pop(1)

    if command in (cmd.search, cmd.ls, cmd.similar, cmd.key):

        p = ArgumentParser()
        p.add_argument('query', nargs='*')
        p.add_argument('--limit', type=int, default=0, #config.LIMIT,
                       help='query limit (use 0 for no limit)')
        p.add_argument('--show', default='', help='display options')
        p.add_argument('--hide', default='', help='display options')

        # TODO: pager temporarily disabled because of transition to python3
        p.add_argument('--pager', choices=('none', 'less', 'emacs'), default='less',
                       help='pager for results')

        p.add_argument('--format', choices=('standard', 'org'), default='standard',
                       help='output format')
        p.add_argument('--by', choices=('relevance', 'modified', 'added'), default='relevance',
                       help='Sort results by')
        p.add_argument('--top', action='store_true',
                       help='Only show top hit.')
        p.add_argument('--no-open', action='store_false',
                       help='do not open top hit')
        p.add_argument('--note', action='store_true',
                       help='Open note for top hit in editor.')

        args = p.parse_args()

        query = ' '.join(args.query)

        limit = args.limit if args.limit > 0 else None

        if args.top:
            args.pager = 'none'
            limit = 1

        if command == cmd.search:
            results = index.search(query)

        elif command == cmd.key:
            # Supports bibtex key search, e.g. 'bottou12counterfactual'
            #
            #  Example key
            #
            #   'bottou12counterfactual'
            #   -> 'author:bottou year:2012 title:counterfactual'
            #
            #   - should be greedy e.g. act like '--top'
            #
            #   - bash completion for keys should be easy to implement and useful.
            #
            p = bibkey(query)
            if p:
                # TODO: this version doesn't search for papers where author is first-author
                q = ' '.join('%s:%s' % (k,v) for (k,v) in zip(['author', 'year', 'title'], p) if v)
                print(q)
                results = index.search(q)
            else:
                results = []

        elif command == cmd.similar:
            results = Document(query).similar(limit=limit)
        elif command == cmd.ls:
            results = ls(query)
        else:
            assert False, 'Unrecognized command %s' % command

        # convert results to list and convert Whoosh.searching.Hit to skid.Document
        results = list(map(todoc, results))

        # sort documents according to '--by' criteria'
        sortwith = {'relevance': score, 'modified': modified, 'added': added}[args.by]
        if command == cmd.ls and args.by == 'relevance':
            sortwith = added
        results.sort(key=sortwith, reverse=True)

        nresults = len(results)

        # limit number of search results
        results = results[:limit]

        if args.format == 'org':
            fmt = org
        else:
            fmt = display

        # process display options
        show = {'author', 'title', 'link', 'link:notes'}   # defaults
        show.update(x.strip() for x in args.show.split(','))
        for x in (x.strip() for x in args.hide.split(',')):
            if x in show:
                show.remove(x)

        with pager(args.pager):
            if limit and len(results) >= limit:
                if args.format == 'org':
                    print('# showing top %s of %s results' % (min(limit, nresults), nresults))
                else:
                    print(colors.yellow % 'showing top %s of %s results' % (min(limit, nresults), nresults))
            fmt(results, show=show)

        if args.top:
            assert len(results) <= 1
            if not results:
                print(colors.red % 'Nothing found')
                return
            [top] = results
            # open top hit
            if args.no_open:
                if args.note:
                    # open user's note in editor
                    os.system('$EDITOR %s' % top.cached + '.d/notes.org')
                else:
                    from subprocess import Popen
                    # open cached document
                    # TODO: read from config file
                    Popen(['xdg-open', top.cached])

    elif command == cmd.add:
        p = ArgumentParser()
        p.add_argument('source')
        p.add_argument('--name')
        args = p.parse_args()
        add(args.source, dest=args.name)

    elif command == cmd.rm:
        p = ArgumentParser()
        p.add_argument('cached')
        args = p.parse_args()
        rm(args.cached)

    elif command == cmd.update:
        index.update()

    elif command == cmd.authors:
        authors()

    elif command == cmd.tags:
        tags()

    elif command == cmd.drop:
        print(colors.yellow % 'Dropping search index... To build a fresh one run\n$ skid update')
        index.drop()

    elif command == cmd.lexicon:
        p = ArgumentParser()
        p.add_argument('field')
        args = p.parse_args()
        lexicon(args.field)

    elif command == cmd.title:
        # doesn't require adding the document, just finds the title.
        from skid.pdfhacks.pdfmill import extract_title
        p = ArgumentParser()
        p.add_argument('pdf')
        p.add_argument('--no-extra', action='store_false', dest='extra')
        args = p.parse_args()
        extract_title(args.pdf, extra=args.extra)

    elif command == cmd.scholar:
        from skid.add import gscholar_bib
        from skid.pdfhacks.pdfmill import extract_title
        p = ArgumentParser()
        p.add_argument('pdf')
        p.add_argument('--no-extra', action='store_false', dest='extra')
        args = p.parse_args()

        # run google scholar search based on extracted title.
        title = extract_title(args.pdf, extra=args.extra)
        gscholar_bib(title=title)

    else:
        print(', '.join(sorted(cmd.ALL)))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: __main__.py Projeto: pombredanne/skid

def main():

    if len(sys.argv) <= 1:
        print ', '.join(sorted(CMDS))
        return

    cmd = sys.argv.pop(1)

    if cmd in (SEARCH, LS, SIMILAR, KEY):

        p = ArgumentParser()
        p.add_argument('query', nargs='*')
        p.add_argument('--limit', type=int, default=0, #config.LIMIT,
                       help='query limit (use 0 for no limit)')
        p.add_argument('--show', default='', help='display options')
        p.add_argument('--hide', default='', help='display options')
        p.add_argument('--pager', choices=('none', 'less', 'emacs'), default='less',
                       help='pager for results')
        p.add_argument('--format', choices=('standard', 'org'), default='standard',
                       help='output format')
        p.add_argument('--by', choices=('relevance', 'modified', 'added'), default='relevance',
                       help='Sort results by')
        p.add_argument('--top', action='store_true',
                       help='Only show top hit.')
        p.add_argument('--no-open', action='store_false',
                       help='do not open top hit')

        args = p.parse_args()

        query = ' '.join(args.query)

        limit = args.limit if args.limit > 0 else None

        if args.top:
            args.pager = 'none'
            limit = 1

        if cmd == SEARCH:
            results = index.search(query)

        elif cmd == KEY:
            # Supports bibtex key search, e.g. 'bottou12counterfactual'
            #
            #  Example key
            #
            #   'bottou12counterfactual'
            #   -> 'author:bottou year:2012 title:counterfactual'
            #
            #   - should be greedy e.g. act like '--top'
            #
            #   - bash completion for keys should be easy to implement and useful.
            #
            p = bibkey(query)
            if p:
                # TODO: this version doesn't search for papers where author is first-author
                q = ' '.join('%s:%s' % (k,v) for (k,v) in zip(['author', 'year', 'title'], p) if v)
                print q
                results = index.search(q)
            else:
                results = []

        elif cmd == SIMILAR:
            results = Document(query).similar(limit=limit)
        elif cmd == LS:
            results = ls(query)
        else:
            assert False, 'Unrecognized command %s' % cmd

        # convert results to list and convert Whoosh.searching.Hit to skid.Document
        results = list(map(todoc, results))

        # sort documents according to '--by' criteria'
        sortwith = {'relevance': score, 'modified': modified, 'added': added}[args.by]
        if cmd == LS and args.by == 'relevance':
            sortwith = added
        results.sort(key=sortwith, reverse=True)

        nresults = len(results)

        # limit number of search results
        results = results[:limit]

        if args.format == 'org':
            fmt = org
        else:
            fmt = display

        # process display options
        show = {'author', 'title', 'link', 'link:notes'}   # defaults
        show.update(x.strip() for x in args.show.split(','))
        for x in (x.strip() for x in args.hide.split(',')):
            if x in show:
                show.remove(x)

        with pager(args.pager):
            if limit and len(results) >= limit:
                if args.format == 'org':
                    print '# showing top %s of %s results' % (min(limit, nresults), nresults)
                else:
                    print yellow % 'showing top %s of %s results' % (min(limit, nresults), nresults)
            fmt(results, show=show)

        if args.top:
            assert len(results) <= 1
            if not results:
                print red % 'Nothing found'
                return
            [top] = results
            # open cached document and user notes
#            os.system('gnome-open %s' % top.cached)
            if args.no_open:
                from subprocess import Popen
                Popen(['gnome-open', top.cached])
#            os.system('$EDITOR %s' % top.cached + '.d/notes.org')

    elif cmd == ADD:
        p = ArgumentParser()
        p.add_argument('source')
        args = p.parse_args()
        add(args.source)

    elif cmd == RM:
        p = ArgumentParser()
        p.add_argument('cached')
        args = p.parse_args()
        rm(args.cached)

    elif cmd == UPDATE:
        update()

    elif cmd == PUSH:
        push()

    elif cmd == AUTHORS:
        authors()

    elif cmd == TAGS:
        tags()

    elif cmd == LEXICON:
        p = ArgumentParser()
        p.add_argument('field')
        args = p.parse_args()
        lexicon(args.field)

    elif cmd == TITLE:
        # doesn't require adding the document, just finds the title.
        from skid.pdfhacks.pdfmill import extract_title
        p = ArgumentParser()
        p.add_argument('pdf')
        p.add_argument('--no-extra', action='store_false', dest='extra')
        args = p.parse_args()
        extract_title(args.pdf, extra=args.extra)

    else:
        print ', '.join(sorted(CMDS))