def run(): parser = argparse.ArgumentParser(description='Calcuclate embeddings') parser.add_argument('input', help='Input containing osm samples') parser.add_argument('output', help='File to safe embeddings in') parser.add_argument('--model', help='Embedding model to us', default="NLE", type=str) parser.add_argument('--ftmodel', help='Path to fasttext model', type=str, default="") parser.add_argument('--db_cred', help='Credentials for database', type=str, default="") parser.add_argument('--njobs', help='Number of threads to use', default=1, type=int) args = parser.parse_args() # create model if args.model == "fasttext": model = FastTextModel(args.ftmodel) elif args.model == 'NLE': db = PostgresDB(read_db_config(args.db_cred)) model = NLEModel(args.output, args.njobs, db) else: raise Exception("Model not found") data = read_samples(args.input) # train model model.train(data) # save model model.save_model(args.output + "_" + args.model)
def main(argv): if len(argv) < 2: sys.stderr.write("not enough arguments\n"); sys.exit(-1); mode= argv[1] if mode == "mle": print "Reading data from stdin..." samples= util.read_samples(sys.stdin) print "MLE = %f" % (Pareto2.mle(samples)) else: if len(argv) < 4: sys.stderr.write("not enough arguments\n") sys.exit(-1) alpha= float(argv[2]) beta= float(argv[3]) p= Pareto2(alpha, beta) if mode == "exp": print "%f" % (p.expectation()) else: if len(argv) < 5: sys.stderr.write("not enough arguments\n") sys.exit(-1) size= int(argv[4]) if mode == "pdf": for x in range(size): print "%d\t%.20f" % (x, p.pdf(x)) elif mode == "cdf": for x in range(size): print "%d\t%.20f" % (x, p.cdf(x)) elif mode == "random": freq= {} samples= [] for i in range(size): if (i % 1000) == 0: print >> sys.stderr, "\rGenerating samples %d/%d" % \ (i+1, size), r= p.random() samples.append(r) print >> sys.stderr, "\rGenerating samples %d/%d" % \ (i+1, size) mean= 0 for r in sorted(samples): mean+= r mean/= size print "# mean = %f" % (mean) for r in samples: print "%d" % (r) else: sys.stderr.write("unknown mode \"%s\"\n" % (mode))
def main(mode, argv): if mode == "mle": if len(argv) > 0: xmin= int(argv[0]) else: xmin= 1 samples= util.read_samples(sys.stdin) print "xmin = %d" % (xmin) print "MLE = %f" % (Zipf.mle(samples, xmin)) return 0 else: if len(argv) < 3: sys.stderr.write("not enough arguments\n") return -1 alpha= float(argv[0]) N= int(argv[1]) xmin= int(argv[2]) z= Zipf(alpha, N, xmin) if mode == "exp": print "%f" % (z.expectation()) return 0 elif mode == "pdf": for i in range(xmin, N+1): print "%d\t%.20f" % (i, z.pmf(i)) return 0 elif mode == "cdf": for i in range(xmin, N+1): print "%d\t%.20f" % (i, z.cdf(i)) return 0 elif mode == "random": if len(argv) < 4: sys.stderr.write("not enough arguments\n") return -1 size= int(argv[3]) freq= {} samples= [] for i in range(size): if (i % 1000) == 0: print >> sys.stderr, "\rGenerating samples %d/%d" % \ (i+1, size), r= z.random() samples.append(r) print >> sys.stderr, "\rGenerating samples %d/%d" % \ (i+1, size) mean= 0 for r in sorted(samples): mean+= r mean/= size print "# mean = %f" % (mean) for r in samples: print "%d" % (r) #print >> sys.stderr, "\rfinished :-)" else: sys.stderr.write("invalid mode \"%s\"\n" % (mode)); return