logging.info("Fitting local TFIDF weights from: %s ..." % args.input)
        lines = streamer(args.input)
        vectorizer.fit(lines)

except:
    logging.info("IDF model file does not exist in: %s ..." % args.idf)
    exit()

DBexists = os.path.exists(outputf)
logging.info("Instantiating index object...")
index = indexing.file_index(
    input_file=args.
    input,  #'/almac/ignacio/data/INEXQA2012corpus/wikiEn_sts_clean_ph2.txt',
    index_file=outputf,
    vectorizer=vectorizer,
    mmap=True,
    wsize=args.wsize,
    sampsize=args.samples,
    n_jobs=1,
    chunk_size=args.chunk,
    verbose=args.verbo)
if not DBexists:
    logging.info("Starting to build index into DB file %s" % outputf)
    index.fit()
    logging.info("Index fitted!!")
    logging.info("Output database: {}".format(outputf))
else:
    logging.info("Index loaded from DB file %s" % outputf)

if index.vocab_size < args.bsize:
    logging.info(
                sublinear_tf = True,# if args.localw.startswith("subl") else False,
                stop_words = "english" #if args.stop == 'ost' else None
                )
        logging.info("Fitting local TFIDF weights from: %s ..." % args.input)
        lines = streamer(args.input)
        vectorizer.fit(lines)
        
except:
    logging.info("IDF model file does not exist in: %s ..." % args.idf)
    exit()

DBexists = os.path.exists(outputf)
logging.info("Instantiating index object...")
index = indexing.file_index(input_file = args.input,
                        index_file = outputf, vectorizer=vectorizer,
                        mmap=True, wsize=args.wsize, sampsize=args.samples, n_jobs=1,
                        chunk_size=args.chunk,
                        verbose=args.verbo)
if not DBexists:
    logging.info("Starting to build index into DB file %s" % outputf)
    index.fit()
    logging.info("Index fitted!!")
    logging.info("Output database: {}".format(outputf))
else:
    logging.info("Index loaded from DB file %s" % outputf)
    
sparse_word_centroids = wordCentroids(db=index, vect=vectorizer)
# Tal vez pueda cargar la matrix dipersa de word_centroids en ram y hacer NMF.
 
logging.info("Fitting Sparse Random Projections for sparse coding ...")
X_s = Dict(sorted({w: v for w, v in sparse_word_centroids