示例#1
0
    import arxiv
    arxiv = arxiv

from arxiv.db_utils import db


if __name__ == "__main__":
    cmd = sys.argv[1]

    if cmd == u"scrape":
        print(u"Scraping all the meta-data from the arxiv...")
        arxiv.get()

    if cmd == u"parse":
        print(u"Parsing the XML...")
        arxiv.parse()

    if cmd == u"build-vocab":
        print(u"Building the vocabulary list...")
        arxiv.build_vocab()

    if cmd == u"get-vocab":
        initial, N = 1000, 5000
        if len(sys.argv) >= 3:
            initial = int(sys.argv[2])
        elif len(sys.argv) >= 4:
            N = int(sys.argv[3])

        arxiv.get_vocab(initial=initial, N=N)

    if cmd in [u"run", u"results"]:
示例#2
0
def read_file(filename, inc):
    in_file = open(filename)
    tree = arxiv.parse(in_file)
    in_file.close()
    yield inc, tree
示例#3
0
    import arxiv
    arxiv = arxiv

from arxiv.db_utils import db


if __name__ == "__main__":
    cmd = sys.argv[1]

    if cmd == u"scrape":
        print(u"Scraping all the meta-data from the arxiv...")
        arxiv.get()

    if cmd == u"parse":
        print(u"Parsing the XML...")
        arxiv.parse()

    if cmd == u"build-vocab":
        print(u"Building the vocabulary list...")
        arxiv.build_vocab()

    if cmd == u"get-vocab":
        initial, N = 1000, 5000
        if len(sys.argv) >= 3:
            initial = int(sys.argv[2])
        elif len(sys.argv) >= 4:
            N = int(sys.argv[3])

        arxiv.get_vocab(initial=initial, N=N)

    if cmd in [u"run", u"results"]: