Exemplo n.º 1
0
def run():
    parser = argparse.ArgumentParser(description='Calcuclate embeddings')

    parser.add_argument('input', help='Input containing osm samples')
    parser.add_argument('output', help='File to safe embeddings in')

    parser.add_argument('--model',
                        help='Embedding model to us',
                        default="NLE",
                        type=str)
    parser.add_argument('--ftmodel',
                        help='Path to fasttext model',
                        type=str,
                        default="")
    parser.add_argument('--db_cred',
                        help='Credentials for database',
                        type=str,
                        default="")
    parser.add_argument('--njobs',
                        help='Number of threads to use',
                        default=1,
                        type=int)

    args = parser.parse_args()

    # create model
    if args.model == "fasttext":
        model = FastTextModel(args.ftmodel)
    elif args.model == 'NLE':
        db = PostgresDB(read_db_config(args.db_cred))
        model = NLEModel(args.output, args.njobs, db)
    else:
        raise Exception("Model not found")

    data = read_samples(args.input)

    # train model
    model.train(data)

    # save model
    model.save_model(args.output + "_" + args.model)
Exemplo n.º 2
0
def main(argv):
    if len(argv) < 2:
        sys.stderr.write("not enough arguments\n");
        sys.exit(-1);
    mode= argv[1]

    if mode == "mle":
        print "Reading data from stdin..."
        samples= util.read_samples(sys.stdin)
        print "MLE = %f" % (Pareto2.mle(samples))

    else:

        if len(argv) < 4:
            sys.stderr.write("not enough arguments\n")
            sys.exit(-1)

        alpha= float(argv[2])
        beta= float(argv[3])

        p= Pareto2(alpha, beta)

        if mode == "exp":
            print "%f" % (p.expectation())

        else:

            if len(argv) < 5:
                sys.stderr.write("not enough arguments\n")
                sys.exit(-1)

            size= int(argv[4])

            if mode == "pdf":
                for x in range(size):
                    print "%d\t%.20f" % (x, p.pdf(x))

            elif mode == "cdf":
                for x in range(size):
                    print "%d\t%.20f" % (x, p.cdf(x))

            elif mode == "random":
                freq= {}
                samples= []
                for i in range(size):
                    if (i % 1000) == 0:
                        print >> sys.stderr, "\rGenerating samples %d/%d" % \
                              (i+1, size),
                    r= p.random()
                    samples.append(r)
                print >> sys.stderr, "\rGenerating samples %d/%d" % \
                      (i+1, size)
                mean= 0
                for r in sorted(samples):
                    mean+= r
                mean/= size
                print "# mean = %f" % (mean)
                for r in samples:
                    print "%d" % (r)
            
            else:
                sys.stderr.write("unknown mode \"%s\"\n" % (mode))
Exemplo n.º 3
0
def main(mode, argv):

    if mode == "mle":

        if len(argv) > 0:
            xmin= int(argv[0])
        else:
            xmin= 1
        
        samples= util.read_samples(sys.stdin)
        print "xmin = %d" % (xmin)
        print "MLE = %f" % (Zipf.mle(samples, xmin))
        return 0

    else:

        if len(argv) < 3:
            sys.stderr.write("not enough arguments\n")
            return -1

        alpha= float(argv[0])
        N= int(argv[1])
        xmin= int(argv[2])

        z= Zipf(alpha, N, xmin)

        if mode == "exp":
            print "%f" % (z.expectation())
            return 0

        elif mode == "pdf":
            for i in range(xmin, N+1):
                print "%d\t%.20f" % (i, z.pmf(i))
            return 0

        elif mode == "cdf":
            for i in range(xmin, N+1):
                print "%d\t%.20f" % (i, z.cdf(i))
            return 0

        elif mode == "random":

            if len(argv) < 4:
                sys.stderr.write("not enough arguments\n")
                return -1

            size= int(argv[3])
            freq= {}
            samples= []
            for i in range(size):
                if (i % 1000) == 0:
                    print >> sys.stderr, "\rGenerating samples %d/%d" % \
                          (i+1, size),
                r= z.random()
                samples.append(r)
            print >> sys.stderr, "\rGenerating samples %d/%d" % \
                  (i+1, size)
            mean= 0
            for r in sorted(samples):
                mean+= r
            mean/= size
            print "# mean = %f" % (mean)
            for r in samples:
                print "%d" % (r)
            #print >> sys.stderr, "\rfinished :-)"
        
        else:
            sys.stderr.write("invalid mode \"%s\"\n" % (mode));
        
    return