"sentence")

else:

    input_path = arguments.input
    output_path = arguments.output

logging.info("Input: %s" % input_path)
logging.info("Output: %s" % output_path)

if os.path.exists(output_path):
    shutil.rmtree(output_path)
os.makedirs(output_path)

logging.info("Initializing lexicon.")
lexicon = DictLexicon(output_path)

logging.info("Initializing storage.")
storage = LdbStorage(output_path)
storage.init_db()
storage.open_db()

logging.info("Initializing index.")
index = InvertedIndex(output_path,
                      field_properties=[
                          ("sentence_id", numpy.int32),
                      ])
index.init_index()
index.open()

logging.info("Initializing sentence stream and its parser.")
        "document"
    )
else:
    input_path = arguments.input
    output_path = arguments.output
    query_paths = arguments.query
    context_input = arguments.context_input

logging.info("Input: %s" % input_path)
logging.info("Context: %s" % context_input)
logging.info("Query: %s" % query_paths)
logging.info("Output: %s" % output_path)


logging.info("Initializing lexicon.")
lexicon = DictLexicon(input_path)
lexicon.load()


logging.info("Opening index.")
index = InvertedIndex(input_path)
index.open()


logging.info("Initializing searcher.")
searcher = Searcher(index, "sentence_id")

logging.info("Initializing storage.")
storage = LdbStorage(input_path)
storage.open_db()
    input_path = arguments.input
    output_path = arguments.output

logging.info("Input: %s" % input_path)
logging.info("Output: %s" % output_path)


if os.path.exists(output_path):
    shutil.rmtree(output_path)
os.makedirs(output_path)



logging.info("Initializing lexicon.")
lexicon = DictLexicon(output_path)
lexicon.load()


logging.info("Initializing storage.")
storage = LdbStorage(output_path)
storage.init_db()
storage.open_db()


logging.info("Initializing index.")
index = InvertedIndex(output_path, field_properties=[
    ("document_id", numpy.int32),
])
index.init_index()
index.open()
    input_path = os.path.join(
        arguments.input,
        "test_out",
        arguments.test_size,
        "sentence"
    )

else:

    input_path = arguments.input


logging.info("Input: %s" % input_path)


logging.info("Initializing lexicon.")
lexicon = DictLexicon(input_path)
lexicon.load()


counter = collections.Counter()
for term, term_id_and_freq in lexicon.term_dict.iteritems():
    counter[term] = term_id_and_freq[1]

i = 0
sys.stdout.write("i,term,freq\n")
for term, freq in counter.most_common():
    i += 1
    sys.stdout.write("%d,%s,%d\n" % (i, term, freq))

logging.info("[DONE]")
    input_path = arguments.input
    output_path = arguments.output

logging.info("Input: %s" % input_path)
logging.info("Output: %s" % output_path)


if os.path.exists(output_path):
    shutil.rmtree(output_path)
os.makedirs(output_path)



logging.info("Initializing lexicon.")
lexicon = DictLexicon(output_path)


logging.info("Initializing storage.")
storage = LdbStorage(output_path)
storage.init_db()
storage.open_db()


logging.info("Initializing index.")
index = InvertedIndex(output_path, field_properties=[
    ("sentence_id", numpy.int32),
])
index.init_index()
index.open()
Пример #6
0
    context_input = os.path.join(arguments.output, "test_out",
                                 arguments.test_size, arguments.language,
                                 "document")
else:
    input_path = arguments.input
    output_path = arguments.output
    query_paths = arguments.query
    context_input = arguments.context_input

logging.info("Input: %s" % input_path)
logging.info("Context: %s" % context_input)
logging.info("Query: %s" % query_paths)
logging.info("Output: %s" % output_path)

logging.info("Initializing lexicon.")
lexicon = DictLexicon(input_path)
lexicon.load()

logging.info("Opening index.")
index = InvertedIndex(input_path)
index.open()

logging.info("Initializing searcher.")
searcher = Searcher(index, "sentence_id")

logging.info("Initializing storage.")
storage = LdbStorage(input_path)
storage.open_db()

if context_input is not None:
Пример #7
0
                        choices=("tiny", "medium", "large"),
                        default="tiny")
arg_parser.add_argument("-i", "--input", type=str)
arguments = arg_parser.parse_args()

if arguments.test == 1:

    input_path = os.path.join(arguments.input, "test_out", arguments.test_size,
                              "sentence")

else:

    input_path = arguments.input

logging.info("Input: %s" % input_path)

logging.info("Initializing lexicon.")
lexicon = DictLexicon(input_path)
lexicon.load()

counter = collections.Counter()
for term, term_id_and_freq in lexicon.term_dict.iteritems():
    counter[term] = term_id_and_freq[1]

i = 0
sys.stdout.write("i,term,freq\n")
for term, freq in counter.most_common():
    i += 1
    sys.stdout.write("%d,%s,%d\n" % (i, term, freq))

logging.info("[DONE]")