Пример #1
0
    # estimator, if the user is providing a negative graph set, we use
    # the twoclass esti OO
    import graphlearn.estimate as estimate
    if args['negative_input']==None:
        args['estimator']=estimate.OneClassEstimator(nu=.5, cv=2, n_jobs=-1)
    else:
        args['estimator']=estimate.TwoClassEstimator( cv=2, n_jobs=-1)
        
    #args for fitting:
    from eden.converter.graph.gspan import gspan_to_eden
    from itertools import islice
    fitargs={ k:args.pop(k) for k in ['lsgg_include_negatives','grammar_n_jobs','grammar_batch_size']}

    if args['negative_input']!=None:
        fitargs['negative_input'] = islice(gspan_to_eden(args.pop('negative_input')),args.pop('num_graphs_neg'))
    else:
        args.pop('negative_input')
        args.pop('num_graphs_neg')

    fitargs['input'] = islice(gspan_to_eden(args.pop('input')),args.pop('num_graphs'))

    #output
    OUTFILE=args.pop('output')

    print "*Sampler init"
    print "*"*80
    print args

    # CREATE SAMPLER, dumping the rest of the parsed args :) 
    from graphlearn.graphlearn import Sampler
Пример #2
0
# positive set contains 2401 elements, of which we use 30% to test of we cen improve them ,
# the rest is used for the oracle
lenpo=int(2401*.3)


# we select those 30% randomly:
splitset= range(2014)
random.shuffle(splitset)
sample=splitset[:lenpo]
oracle=splitset[lenpo:]

path='../example/'

# we create an oracle
estimator=make_estimator(picker(gspan_to_eden(path+'bursi.pos.gspan'),oracle),gspan_to_eden(path+'bursi.neg.gspan'))
print 'estimator ok'



# ok we create an iterator over the graphs we want to work with...
graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample)


# save results here:
originals=[]
improved=[]



# we want to use an increasing part of the test set..
Пример #3
0
        sampler.transform(
            graphs_,
            same_radius=False,
            size_constrained_core_choice=False,
            sampling_interval=9999,
            select_cip_max_tries=100,
            batch_size=30,
            n_steps=100,
            n_jobs=-1,
            improving_threshold=0.9,
        )
    )


# initializing
graphs_pos = gspan_to_eden(path + "bursi.pos.gspan")
graphs_neg = gspan_to_eden(path + "bursi.neg.gspan")
originals = []
improved = []
percentages = [0.2, 0.4, 0.6, 0.8, 1]

for perc in percentages:

    ######### first we generate all the iterators ###########
    # how many graphs will be used for sampling?
    count_pos = int(NUMPOS * 0.7 * perc)
    count_neg = int(NUMNEG * 0.7 * perc)

    # copy the mega set
    graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos, 3)
    graphs_neg, graphs_neg_, graphs_neg__ = itertools.tee(graphs_neg, 3)
Пример #4
0
# positive set contains 2401 elements, of which we use 30% to test of we cen improve them ,
# the rest is used for the oracle
lenpo=int(2401*.3)


# we select those 30% randomly:
splitset= range(2014)
random.shuffle(splitset)
sample=splitset[:lenpo]
oracle=splitset[lenpo:]

path='../example/'

# we create an oracle
estimator=make_estimator(picker(gspan_to_eden(path+'bursi.pos.gspan'),oracle),gspan_to_eden(path+'bursi.neg.gspan'))
print 'estimator ok'



# ok we create an iterator over the graphs we want to work with...
graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample)


# save results here:
originals=[]
improved=[]



# we want to use an increasing part of the test set..
Пример #5
0
    sampler =GraphLearnSampler()
    graphs, graphs_ = itertools.tee(graphs)
    sampler.fit(graphs)
    return unpack(sampler.sample(graphs_,
                                 same_radius=False,
                                 max_size_diff=False,
                                 sampling_interval=9999,
                                 select_cip_max_tries=100,
                                 batch_size=30,
                                 n_steps=100,
                                 n_jobs=-1,
                                 improving_threshold=0.9
                                 ))

# initializing
graphs_pos= gspan_to_eden(path+'bursi.pos.gspan')
graphs_neg= gspan_to_eden(path+'bursi.neg.gspan')
originals=[]
improved=[]
percentages=[.2,.4,.6,.8,1]

for perc in percentages:

    ######### first we generate all the iterators ###########
    # how many graphs will be used for sampling?
    count_pos = int(NUMPOS*.7*perc)
    count_neg = int(NUMNEG*.7*perc)

    # copy the mega set
    graphs_pos, graphs_pos_, graphs_pos__ = itertools.tee(graphs_pos,3)
    graphs_neg, graphs_neg_ , graphs_neg__= itertools.tee(graphs_neg,3)