Пример #1
0
def test(log_file):
	of = open(log_file, "w")

#	model = AdaBoostClassifier(base_estimator=SVC(10, probability=True), n_estimators=20, learning_rate=1)
#	model = AdaBoostClassifier(n_estimators=20, learning_rate=1)

	of.write("\t\t\tAUC\t\tPrecise\t\tRecall\t\tF1\n")

#	model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
	model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate)
	X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True)
	auc, pre, rec, f1 = train_and_validation(X, app_Y, model)
	of.write("App\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))

#	model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
	model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate)
	X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True)
	auc, pre, rec, f1 = train_and_validation(X, churn_Y, model)
	of.write("Churn\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))

#	model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
	model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate)
	X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True)
	auc, pre, rec, f1 = train_and_validation(X, up_Y, model)
	of.write("Up\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))
Пример #2
0
def generate_quiz(request, friend_id):
    time_start = time()
    friend_data = get_data(request, friend_id, FRIEND_FIELDS)
    time_friend_data = time()
    self_data = get_data(request, 'me', SELF_FIELDS)
    time_self_data = time()

    logger.debug("TIME: friend_data fetch: %sms",
                 round(1000 * (time_friend_data - time_start)))
    logger.debug("TIME: self_data fetch: %sms",
                 round(1000 * (time_self_data - time_friend_data)))

    questions = get_questions(self_data, friend_data)

    answers = [question.correct_index for question in questions]
    request.session['answers'] = answers
    request.session['questions'] = [jsonpickle.encode(question)
                                    for question in questions]

    context = RequestContext(request,
                             {'request': request,
                              'questions': questions,
                              'answers': answers,
                              'profile_pic': request.session['profile_pic'],
                              'profile_url': request.session['profile_url'],
                             })

    logger.debug("TIME: all preprocessing: %sms",
                 round(1000 * (time() - time_start)))

    return render_to_response('quiz.html', context_instance=context)
 def test_memoize(self):
     utils.get_data()
     self.assertTrue('expire' in utils.CACHE['get_data'])
     self.assertTrue('data' in utils.CACHE['get_data'])
     utils.CACHE.clear()
     utils.get_data()
     self.assertTrue('expire' in utils.CACHE['get_data'])
     self.assertTrue('data' in utils.CACHE['get_data'])
Пример #4
0
def compare(request,p1):
	compare_form = CompareForm(request.GET if request.GET.get('submit',None) else None)
	context = {'compare_form':compare_form,'person1':p1}
	if compare_form.is_valid():
		p2 = compare_form.cleaned_data['compare_to']
		p1oked, p1vetoed = utils.get_data(p1)
		p2oked, p2vetoed = utils.get_data(p2)
		context['person2'] = p2
		context['p1oked'] = utils.sort_nameset(p1oked-p2oked)
		context['p2oked'] = utils.sort_nameset(p2oked-p1oked)
		context['intersection'] = utils.sort_nameset(p2oked.intersection(p1oked))

	return HttpResponse(loader.get_template("names/compare.html").render(RequestContext(request,context)))
Пример #5
0
def index(request, form=None):
    if not form:
        form, user = auth_user(request)
    else:
        user = None

    if not user:
        content = utils.get_data('no_auth_index_content.%s.html'%request.LANGUAGE_CODE)
    elif user.role == 'admin':
        content = utils.get_data('admin_index_content.%s.html'%request.LANGUAGE_CODE)
    else:
        content = utils.get_data('user_index_content.%s.html'%request.LANGUAGE_CODE)

    return render_to_response('index.html',{'form':form, 'user':user,'content':content})
Пример #6
0
def fine_tuning_step(n_estimators, max_features, max_depth):
	model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth)
	X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True)
	app_auc, pre, rec, f1 = train_and_validation(X, app_Y, model)

	model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth)
	X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True)
	churn_auc, pre, rec, f1 = train_and_validation(X, churn_Y, model)
	
	model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth)
	X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True)
	up_auc, pre, rec, f1 = train_and_validation(X, up_Y, model)

	return app_auc, churn_auc, up_auc
 def test_cache(self):
     """
     Test cache decorator for get_data function.
     """
     CACHE = utils.CACHE
     self.assertEqual(CACHE, {})
     data = utils.get_data()
     self.assertNotEqual(CACHE, {})
     self.assertEqual(CACHE['get_data']['data'], data)
     cache_time = CACHE['get_data']['time']
     CACHE['get_data']['time'] = time() + 86400
     utils.get_data()
     self.assertNotEqual(cache_time, CACHE['get_data']['time'])
     CACHE = {}
Пример #8
0
def fine_tuning_step(n_neighbors):
	model = NearestNeighbors(n_neighbors=n_neighbors)
	X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True)
	app_auc, pre, rec, f1 = train_and_validation(X, app_Y, model)

	model = NearestNeighbors(n_neighbors=n_neighbors)
	X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True)
	churn_auc, pre, rec, f1 = train_and_validation(X, churn_Y, model)
	

	model = NearestNeighbors(n_neighbors=n_neighbors)
	X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True)
	up_auc, pre, rec, f1 = train_and_validation(X, up_Y, model)

	return app_auc, churn_auc, up_auc
def get_suggestions(**args):
    # initializations
    papers = []
    bibcodes = []
    if 'bibcodes' in args:
        bibcodes = args['bibcodes']
    if len(bibcodes) == 0:
        return []
    # Any overrides for default values?
    Nsuggestions = current_app.config.get('CITATION_HELPER_NUMBER_SUGGESTIONS')
    # get rid of potential trailing spaces
    bibcodes = map(lambda a: a.strip(), bibcodes)[:current_app.config.get('CITATION_HELPER_MAX_INPUT')]
    # start processing
    # get the citations for all publications (keeping multiplicity is essential)
    papers = get_data(bibcodes=bibcodes)
    if "Error" in papers:
        return papers
    # removes papers from the original list to get candidates
    papers = filter(lambda a: a not in bibcodes, papers)
    # establish frequencies of papers in results
    paperFreq = [(k,len(list(g))) for k, g in groupby(sorted(papers))]
    # and sort them, most frequent first
    paperFreq = sorted(paperFreq, key=operator.itemgetter(1),reverse=True)
    # remove all papers with frequencies smaller than threshold
    paperFreq = filter(lambda a: a[1] > current_app.config.get('CITATION_HELPER_THRESHOLD_FREQUENCY'), paperFreq)
    # get metadata for suggestions
    meta_dict = get_meta_data(results=paperFreq[:Nsuggestions])
    if "Error"in meta_dict:
        return meta_dict
    # return results in required format
    return [{'bibcode':x,'score':y, 'title':meta_dict[x]['title'], 'author':meta_dict[x]['author']} for (x,y) in paperFreq[:Nsuggestions] if x in meta_dict.keys()]
Пример #10
0
def ma_nearby_x(code,index=0, x=24):
	day_data = get_day_data_rv(code)
	# return
	# print day_data
	sum=0;
	if len(day_data)<(x*2+index):
		return False
	for x in range(index,x+index):
		sum+=float(day_data[x][3])
		print day_data[x]
	print sum
	print x
	print sum/(x+1)
	return 
	newest_data = day_data[index][1:5]
	point = float(newest_data[2])
	point_x = sum/x
	if code[0] == '0' or code[0] == '3':
		_code = 'sz'+code
	else:
		_code = 'sh'+code
	res = utils.get_data(CONFIG.URL_CHECK_STOCK+_code)
	res = json.loads(res[23:len(res)-4])
	if float(res[0]) == 0:
		return False
	if (point-point_x)/point_x < 0.05:
		up_x_data.append([point_x,point,(point-point_x)/point_x,code])
		return True
	return False
    def test_service_results(self):
        '''Test to see if mock methods return expected results'''
        httpretty.register_uri(
            httpretty.GET, self.app.config.get(
                'CITATION_HELPER_SOLRQUERY_URL'),
            content_type='application/json',
            status=200,
            body="""{
            "responseHeader":{
            "status":0, "QTime":0,
            "params":{ "fl":"reference,citation", "indent":"true",
            "wt":"json", "q":"*"}},
            "response":{"numFound":10456930,"start":0,"docs":%s
            }}""" % json.dumps(mockdata))

        expected_papers = [
            u'x', u'z', u'd', u'x', u'e', u'y', u'p',
            u'p', u'c', u'p', u'y', u'a']
        bibcodes = ['a', 'b', 'c']
        results = get_data(bibcodes=bibcodes)
        self.assertEqual(results, expected_papers)

        expected_meta = {u'a': {'author': u'a_author,+', 'title': u'a_title'},
                         u'c': {'author': u'c_author,+', 'title': u'c_title'},
                         u'b': {'author': u'b_author,+', 'title': u'b_title'},
                         u'p': {'author': u'p_author,+', 'title': u'p_title'},
                         u'y': {'author': u'y_author,+', 'title': u'y_title'},
                         u'x': {'author': u'x_author,+', 'title': u'x_title'},
                         u'z': {'author': u'z_author,+', 'title': u'z_title'}}
        scorelist = [('a', 3), ('b', 2)]
        resmeta = get_meta_data(results=scorelist)
        self.assertEqual(resmeta, expected_meta)
 def test_group_by_weekday(self):
     """
     Test grouping presence entries by weekday.
     """
     data = utils.get_data()
     correct_data = [[], [30047], [24465], [23705], [], [], []]
     self.assertEqual(utils.group_by_weekday(data[10]), correct_data)
Пример #13
0
def run():
    table = get_data(get_data_path('taxonomy.csv'))
    group_id = species_id = 1
    for index in range(1, len(table)):
        row = table[index]
        kwargs = {
            'id': group_id,
            'order': row[5],
            'family': row[6].split('(')[0].strip(),
            'genus': row[3].split(' ')[0],
        }
        if 'en' in get_languages() and '(' in row[6]:
            kwargs['name_en'] = row[6].split('(')[1][:-1].strip()
        group, created = SpeciesGroup.objects.get_or_create(**kwargs)
        if created:
            group_id += 1

        Species.objects.create(
            id=species_id,
            include=False,
            order=species_id,
            rank=Rank.objects.get(slug=row[1]),
            group=group,
            standard_name=row[4],
            scientific_name=row[3],
        )
        if created:
            species_id += 1
def make_movie(datadir, deltaname, variable, framenum):

    datafile = utils.datafiles[variable]
    delta = utils.get_data(datadir, datafile, deltaname)
    mp = utils.gridEdges(datadir)

    cmap = utils.cmap[variable]
    vmin = np.nanmin(delta["data"])
    vmax = np.nanmax(delta["data"])
    norm = colors.Normalize(vmin=vmin, vmax=vmax)

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    bm = utils.basemap(ax)

    X, Y = bm(mp["lons"], mp["lats"])
    print ax.get_xlim()
    print ax.get_ylim()

    ax.axis(utils.mapbounds[deltaname])

    def updatefig(i):
        mp["map"][delta["inds"][0], delta["inds"][1]] = delta["data"].iloc[i, :]
        date = delta["data"].index[i].strftime("%Y-%m-%d")
        im = bm.pcolormesh(X, Y, np.ma.masked_invalid(mp["map"]), cmap=cmap, norm=norm)
        cbar = bm.colorbar(im, "bottom", cmap=cmap, norm=norm)
        ax.set_title("{}: {}".format(utils.fullname[variable], date))

        framenum = 5

    ani = animation.FuncAnimation(fig, updatefig, frames=framenum)
    ani.save("{}_{}_{}.mp4".format(utils.fullname[variable], deltaname, framenum))
Пример #15
0
def fine_tuning_step(n_estimators, learning_rate):
	sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=5, inter_op_parallelism_threads=5))
	
	model = NN_Voting(sess, n_estimators=n_estimators, learning_rate=learning_rate)
	X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True)
	app_auc, pre, rec, f1 = train_and_validation(sess, X, app_Y, model)

	model = NN_Voting(sess, n_estimators=n_estimators, learning_rate=learning_rate)
	X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True)
	churn_auc, pre, rec, f1 = train_and_validation(sess, X, churn_Y, model)
	
	model = NN_Voting(sess, n_estimators=n_estimators, learning_rate=learning_rate)
	X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True)
	up_auc, pre, rec, f1 = train_and_validation(sess, X, up_Y, model)

	return app_auc, churn_auc, up_auc
Пример #16
0
def fine_tuning_step(n_estimators, learning_rate):
	model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate)
#	model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
	X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True)
	app_auc, pre, rec, f1 = train_and_validation(X, app_Y, model)

#	model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
	model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate)
	X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True)
	churn_auc, pre, rec, f1 = train_and_validation(X, churn_Y, model)
	
#	model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
	model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate)
	X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True)
	up_auc, pre, rec, f1 = train_and_validation(X, up_Y, model)

	return app_auc, churn_auc, up_auc
Пример #17
0
def train(fea_file, model_file):
    X_train,Y_train = utils.get_data(fea_file)
    print "load fea file ok"
    logreg = linear_model.LogisticRegression(C=1e5)
    logreg.fit(X_train,Y_train)
    print "fit ok"
    joblib.dump(logreg, model_file)
    print "dump ok"
Пример #18
0
 def _anonymoususer(request, *args, **kwargs):
     user = request.user
     if user.is_authenticated():
         return HttpResponseRedirect(redirect_to=url_reverse('users.views.view_homepage'))
     next = get_data(request).get('next', '')
     if next:
         kwargs['next'] = next        
     return the_function(request, *args, **kwargs)
Пример #19
0
def predict(fea_file, model_file, predict_file):
    Y_test,X_test = utils.get_data(fea_file)
    log_reg = joblib.load(model_file)
    Y_predict = log_reg.predict(X_test)
    ou_predict = open(predict_file, "w")
    for y in Y_predict:
        ou_predict.write(str(y) + "\n")
    ou_predict.close()
Пример #20
0
def export_secondaryref_info(request,outfile):
    ids = utils.get_data(request,'mysql')
    res=[]
    data = utils.get_data('SELECT refdesc.id,year,language,keywords FROM refdesc INNER JOIN refs ON refs.id=refdesc.id WHERE keywords IS NOT NULL;','mysql')
    # put in dico
    data_dico = dict()
    for row in data :
	r = list(row)
        #print('r : '+r[0].encode('utf8'))
	data_dico[r[0].encode('utf8')]=[r[i] for i in range(1,len(r))]
    for i in ids :
        #print(i[0])
        if i[0].encode('utf8') in data_dico :
	    #print('i : '+i[0].encode('utf8'))
	    #print(data_dico[i[0].encode('utf8')])
	    res.append(data_dico[i[0].encode('utf8')])
    utils.export_matrix_csv(res,outfile,'\t',False)
Пример #21
0
def run(*args):
    for filename in args:
        table = get_data(filename)
        for index in range(1, len(table)):
            row = table[index]
            species = Species.objects.get(scientific_name=row[0])
            species.include = True
            setattr(species, table[0][1], row[1])
            species.save()
def users_view():
    """
    Users listing for dropdown.
    """
    data = get_data()
    return [
        {'user_id': i, 'name': 'User {0}'.format(str(i))}
        for i in data.keys()
    ]
Пример #23
0
def test(log_file):
	of = open(log_file, "w")

	of.write("\t\t\tAUC\t\tPrecise\t\tRecall\t\tF1\n")

	model = NearestNeighbors(n_neighbors=n_neighbors)
	X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True)
	auc, pre, rec, f1 = train_and_validation(X, app_Y, model)
	of.write("App\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))

	model = NearestNeighbors(n_neighbors=n_neighbors)
	X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True)
	auc, pre, rec, f1 = train_and_validation(X, churn_Y, model)
	of.write("Churn\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))

	model = NearestNeighbors(n_neighbors=n_neighbors)
	X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True)
	auc, pre, rec, f1 = train_and_validation(X, up_Y, model)
	of.write("Up\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))
Пример #24
0
def main():
    """Driver function."""
    from utils import get_data
    params = {'symbols': ['AAPL', 'FB', 'GOOG'],
              'start_date': '01/01/2012',
              'end_date': '03/20/2016',
              'principle': 1000.00}
    prices = get_data(params)
    allocs = optimize_portfolio(prices)
    print(allocs)
Пример #25
0
def do_get_companies(hint):
    if not hint:
        return []
    hint = str(hint)
    data = get_data()
    ret = []
    for mem_id in data:
        mail = data[mem_id]["email"]
        domain = mail.split("@")[-1]
        if hint.lower() in domain.lower():
            ret.append(data[mem_id])
    return ret
Пример #26
0
def test_bootstrap() :
    corpus = utils.get_data('SELECT id FROM refdesc WHERE abstract_keywords IS NOT NULL LIMIT 2000;','../../Data/dumps/20160126_cybergeo.sqlite3')
    for kwLimit in [50,100,200] :
        for subCorpusSize in [100,500,1000,2000] :
            bootstrapSize=25
            [relevantkw,relevant_dico,allkw] = bootstrap_subcorpuses(corpus,kwLimit,subCorpusSize,bootstrapSize)
            utils.export_dico_csv(relevant_dico,'res/conv_dico/bootstrap_relevantDico_kwLimit'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_bootstrapSize'+str(bootstrapSize),True)
            utils.export_list(relevantkw,'res/conv_kw/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize),False)
	    utils.export_dico_num_csv(relevantkw,'res/conv_tm/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize),False)
	    for i in range(len(allkw)) :
		    local_kw = allkw[i]
		    utils.export_list(local_kw.keys(),'res/conv_kw/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_run'+str(i),False)
		    utils.export_dico_num_csv(local_kw,'res/conv_tm/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_run'+str(i),False)
Пример #27
0
def do_get_cores(hint):
    if not hint:
        return []
    hint = str(hint)
    data = get_data()
    ret = []
    for mem_id in data:
        username = data[mem_id]["username"]
        fullname = data[mem_id]["fullname"]
        target_str = " ".join([username, fullname])
        if hint.lower() in target_str.lower():
            ret.append(data[mem_id])
    return ret
Пример #28
0
def extract_relevant_cybergeo(kwLimit, database):
    corpus = utils.get_data(
        "SELECT cybergeo.id FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!='';",
        database,
    )
    print(corpus)
    occurence_dicos = utils.import_kw_dico_req(
        "SELECT cybergeo.id,abstract_keywords FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!='';",
        database,
    )
    print(occurence_dicos)
    [relevantkw, relevant_dico] = kwFunctions.extract_relevant_keywords(corpus, kwLimit, occurence_dicos)
    utils.export_dico_csv(relevant_dico, "res/cybergeo/relevantDico_kwLimit" + str(kwLimit), False)
    utils.export_dico_num_csv(relevantkw, "res/cybergeo/kw_" + str(kwLimit), False)
 def test_get_data(self):
     """
     Test parsing of CSV file.
     """
     data = utils.get_data()
     self.assertIsInstance(data, dict)
     self.assertItemsEqual(data.keys(), [10, 11, 12, 13, 5123])
     sample_date = datetime.date(2013, 9, 10)
     self.assertIn(sample_date, data[10])
     self.assertItemsEqual(data[10][sample_date].keys(), ['start', 'end'])
     self.assertEqual(
         data[10][sample_date]['start'],
         datetime.time(9, 39, 5)
     )
Пример #30
0
def extract_relevant_cybergeo_fulltext(kwLimit):
    resdir = "res/cybergeo_full/"
    #
    corpus = utils.get_data(
        "SELECT id FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!='' LIMIT 10;", "mysql"
    )
    occurence_dicos = utils.import_kw_dico_req(
        "SELECT id,fulltext_keywords FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!='' LIMIT 10;",
        "mysql",
    )
    [relevantkw, relevant_dico] = kwFunctions.extract_relevant_keywords(corpus, kwLimit, occurence_dicos)
    # export as csv
    utils.export_dico_csv(relevant_dico, resdir + "relevantDico_kw" + str(kwLimit), False)
    export_dico_num_csv(relevantkw, resdir + "termhoods_kw" + str(kwLimit), False)
Пример #31
0
    def train_model(self):
        # initialize variables
        try:
            tf.global_variables_initializer().run()
        except:
            tf.initialize_all_variables().run()

        # load model if model is exist
        bool_load = self.load_model()
        if bool_load:
            print('[***] load model successfully')
        else:
            print('[!!!] fail to load model')

        # get mnist dataset
        datasource = get_data(data_type=self.config.dataset,
                              is_training=self.config.is_training)
        data_gen = gen_batch_data(batchsize=self.batchsize,
                                  datasource=datasource)

        counter = 0
        for epoch in range(self.config.epoch):
            # save model per 10 epoches
            print('epoch:{}'.format(epoch))
            if np.mod(epoch, 10) == 0:
                self.save_model()

            for ite in tqdm(range(50000 / self.batchsize)):
                input_x, input_labels = next(data_gen)
                noise_z = np.random.uniform(-1,
                                            1,
                                            size=[self.batchsize,
                                                  self.z_dim]).astype(np.float)

                if ite == 0:
                    sample_labels = input_labels

                # optimize discriminator
                _, d_loss, summaries = self.sess.run(
                    [self.d_optim, self.d_loss, self.summaries],
                    feed_dict={
                        self.z: noise_z,
                        self.input_labels: input_labels,
                        self.input_x: input_x
                    })
                # optimize generator
                _, g_loss = self.sess.run([self.g_optim, self.g_loss],
                                          feed_dict={
                                              self.z: noise_z,
                                              self.input_labels: input_labels
                                          })
                # optimize generator
                _, g_loss = self.sess.run([self.g_optim, self.g_loss],
                                          feed_dict={
                                              self.z: noise_z,
                                              self.input_labels: input_labels
                                          })

                # sample image during training phase
                if np.mod(ite, 100) == 0:
                    sample = self.sample.eval({
                        self.z: noise_z,
                        self.input_labels: sample_labels
                    })
                    save_image([8, 8], '{}/sample_{:3d}_{:4d}.png'.format(
                        self.config.sample_dir, epoch, ite), sample)
                    # save_image([8,8], '{}/input_{:3d}_{:4d}.png'.format(self.config.sample_dir, epoch, ite), input_x)

                # visualize loss in browser using tensorboard
                counter = counter + 1
                self.summary_writer.add_summary(summaries, global_step=counter)
                        '--mode',
                        type=str,
                        required=True,
                        help='either "train" or "test"')
    parser.add_argument('-w',
                        '--weights',
                        type=str,
                        help='a trained model weights')
    args = parser.parse_args()

    maybe_make_dir('weights')
    maybe_make_dir('portfolio_val')

    timestamp = time.strftime('%Y%m%d%H%M')

    data = np.around(get_data())
    train_data = data[:, :3526]
    test_data = data[:, 3526:]

    env = TradingEnv(train_data, args.initial_invest)
    state_size = env.observation_space.shape
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    scaler = get_scaler(env)

    portfolio_value = []

    if args.mode == 'test':
        # remake the env with test data
        env = TradingEnv(test_data, args.initial_invest)
        # load trained weights

def bernoulli(head, tail):
    total = head + tail
    return ((head / total)**head) * ((tail / total)**tail)


def binomial(head, tail):
    return combinations(head + tail, head) * bernoulli(head, tail)


parser = argparse.ArgumentParser()
parser.add_argument('--filename', type=str, default='testfile.txt')
parser.add_argument('--a', type=int, default=0)
parser.add_argument('--b', type=int, default=0)
args = parser.parse_args()
print(args)

trials = get_data(args.filename)
a, b = args.a, args.b

for case, trial in enumerate(trials, 1):
    print('case %s: %s' % (case, trial))
    head = trial.count('1')
    tail = len(trial) - head
    print('Likelihood: %s' % binomial(head, tail))
    print('Beta prior:     a = %s b = %s' % (a, b))
    a += head
    b += tail
    print('Beta posterior: a = %s b = %s\n' % (a, b))
Пример #34
0
# add the model on top of the convolutional base
model = Model(inputs=model.input, outputs=top_model(model.output))

for layer in model.layers[:19]:
    layer.trainable = False

# Two methods:
# 1. Use sparse_categorial_crossentropy
# 2. if using categorical_crossentropy:
#      should use keras.utils.np_utils.to_categorical in generator to convert labels.
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

train_lines, valid_lines = get_data('data.csv')

ntrain, nvalid = len(train_lines), len(valid_lines)

print("""
Training set: %d images.
Validation set: %d images.
""" % (ntrain, nvalid))

train_generator = generator_from_csv(train_lines,
                                     batch_size=batch_size,
                                     target_size=(img_height, img_width),
                                     train=True)
validation_generator = generator_from_csv(valid_lines,
                                          batch_size=1,
                                          target_size=(img_height, img_width),
Пример #35
0
def get_home_page():
    return render_template("home.html", data=get_data())
Пример #36
0
    model.fit(X, y.reshape(-1))

    # Get loss from std
    sys.stdout = old_stdout
    loss_history = mystdout.getvalue()
    loss_list = []
    for line in loss_history.split('\n'):
        if (len(line.split("loss: ")) == 1):
            continue
        loss_list.append(float(line.split("loss: ")[-1]))
    return model, loss_list


if __name__ == '__main__':
    config = {EPOCHS: 1000, LEARNING_RATE: 0.05}
    X, y = get_data()

    # Sklearn model
    model, sklearn_losses = sklearn_classifier(X, y, config)

    # Random selection coordinate descent
    print("Random Model")
    config[WEIGHT_SELECTION] = "random"
    rand_model = CoordinateDescentRegression(X.shape[1])
    rand_losses, rand_accs = train(rand_model, X, y, config)

    # config[LEARNING_RATE] = 0.00001
    # Best selection coordinate descent
    print("BEST Model")
    config[WEIGHT_SELECTION] = "best"
    best_model = CoordinateDescentRegression(X.shape[1])
Пример #37
0
def get_data():
    res = utils.get_data()
    return utils.sort_data(res)
Пример #38
0
def get_wide_deep_columns_new(data_file):
    df = get_data(data_file)
    print('len(df.columns)', len(df.columns))
    deep_columns = []
    wide_columns = []
    cross_dict = {}
    for feature_conf_info in feature_conf_list:
        feature_function = feature_conf_info['feature_column_function']
        feature_name = feature_conf_info['name']
        ###########################category############################
        if feature_function == 'categorical_column_with_vocabulary_list':
            voc_list = feature_conf_info.get('voc_list')
            if voc_list is None:
                voc_list = df[feature_name].unique()
            feature = tf.feature_column.categorical_column_with_vocabulary_list(
                feature_name, voc_list)
            # feature = tf.feature_column.categorical_column_with_identity(feature_name, len(voc_list))
            wide_columns.append(feature)
            deep_columns.append(
                tf.feature_column.embedding_column(feature, dimension=8))

        elif feature_function == 'categorical_column_with_hash_bucket':  # 字符串映射,不想维护映射关系,hash_bucket_size=类别数2*5倍,会有hash冲突的问题
            hash_bucket_size = feature_conf_info['hash_bucket_size']
            feature = tf.feature_column.categorical_column_with_hash_bucket(
                feature_name, hash_bucket_size=hash_bucket_size)
            embed_dim = embedding_dim(hash_bucket_size)
            wide_columns.append(feature)
            deep_columns.append(
                tf.feature_column.embedding_column(feature,
                                                   dimension=embed_dim))
        ###########################category#################################

        ###########################dense columns############################
        elif feature_function == 'numeric_column':
            deep_columns.append(tf.feature_column.numeric_column(feature_name))
        ###########################dense columns############################

        ###########################cross columns############################
        elif feature_name == 'same_city_look_order_start_distance':
            bucket_num = feature_conf_info['bucket_num']
            dim_value = bucket_num
            score_span = (df[feature_name].max() -
                          df[feature_name].min()) // bucket_num + 1
            min_value = df[feature_name].min()
            same_city_look_order_start_distance_buckets = tf.feature_column.bucketized_column(
                tf.feature_column.numeric_column(feature_name),
                [(i * score_span + min_value) for i in range(bucket_num)])
            wide_columns.append(same_city_look_order_start_distance_buckets)
            embed_dim = embedding_dim(dim_value)
            deep_columns.append(
                tf.feature_column.embedding_column(
                    same_city_look_order_start_distance_buckets,
                    dimension=embed_dim))
            cross_dict.update({
                'same_city_look_order_start_distance':
                same_city_look_order_start_distance_buckets
            })
        elif feature_name == 'same_city_look_order_price':
            bucket_num = feature_conf_info['bucket_num']
            dim_value = bucket_num
            score_span = (df[feature_name].max() -
                          df[feature_name].min()) // bucket_num + 1
            min_value = df[feature_name].min()
            same_city_look_order_price_buckets = tf.feature_column.bucketized_column(
                tf.feature_column.numeric_column(feature_name),
                [(i * score_span + min_value) for i in range(bucket_num)])
            wide_columns.append(same_city_look_order_price_buckets)
            embed_dim = embedding_dim(dim_value)
            deep_columns.append(
                tf.feature_column.embedding_column(
                    same_city_look_order_price_buckets, dimension=embed_dim))
            cross_dict.update({
                'same_city_look_order_price':
                same_city_look_order_price_buckets
            })
        elif feature_name == 'same_city_look_order_time_diff':
            bucket_num = feature_conf_info['bucket_num']
            dim_value = bucket_num
            score_span = (df[feature_name].max() -
                          df[feature_name].min()) // bucket_num + 1
            min_value = df[feature_name].min()
            same_city_look_order_time_diff_buckets = tf.feature_column.bucketized_column(
                tf.feature_column.numeric_column(feature_name),
                [(i * score_span + min_value) for i in range(bucket_num)])
            wide_columns.append(same_city_look_order_time_diff_buckets)
            embed_dim = embedding_dim(dim_value)
            deep_columns.append(
                tf.feature_column.embedding_column(
                    same_city_look_order_time_diff_buckets,
                    dimension=embed_dim))
            cross_dict.update({
                'same_city_look_order_time_diff':
                same_city_look_order_time_diff_buckets
            })
        elif feature_function == 'crossed_column':  # 交叉特征的元素不能重复定义,所以只能单独拎出来
            start_distance_price_time_diff_feature = tf.feature_column.crossed_column(
                [
                    cross_dict['same_city_look_order_start_distance'],
                    cross_dict['same_city_look_order_price'],
                    cross_dict['same_city_look_order_time_diff']
                ],
                hash_bucket_size=10000)
            wide_columns.append(start_distance_price_time_diff_feature)
            deep_columns.append(
                tf.feature_column.embedding_column(
                    start_distance_price_time_diff_feature,
                    dimension=embedding_dim(10000)))

            start_distance_price_feature = tf.feature_column.crossed_column(
                [
                    cross_dict['same_city_look_order_start_distance'],
                    cross_dict['same_city_look_order_price']
                ],
                hash_bucket_size=1000)
            wide_columns.append(start_distance_price_feature)
            deep_columns.append(
                tf.feature_column.embedding_column(
                    start_distance_price_feature,
                    dimension=embedding_dim(1000)))

            price_time_diff_feature = tf.feature_column.crossed_column(
                [
                    cross_dict['same_city_look_order_price'],
                    cross_dict['same_city_look_order_time_diff']
                ],
                hash_bucket_size=1000)
            wide_columns.append(price_time_diff_feature)
            deep_columns.append(
                tf.feature_column.embedding_column(
                    price_time_diff_feature, dimension=embedding_dim(1000)))

            start_distance_time_diff_feature = tf.feature_column.crossed_column(
                [
                    cross_dict['same_city_look_order_start_distance'],
                    cross_dict['same_city_look_order_time_diff']
                ],
                hash_bucket_size=1000)
            wide_columns.append(start_distance_time_diff_feature)
            deep_columns.append(
                tf.feature_column.embedding_column(
                    start_distance_time_diff_feature,
                    dimension=embedding_dim(1000)))
        else:
            pass
        ###########################cross columns############################

    return wide_columns, deep_columns
Пример #39
0
 def get(self, url, param, retry=3):
     logger.info('Crawl content url: %s, %s', url, str(param))
     if not url.startswith('http'):
         return utils.read_content(url)
     return utils.get_data(url, param, retry)
Пример #40
0
def get_label(cnt, pct, labelList, n):
    '''
    预测年龄、性别、星座、学历
    :param score:
    :return: 兴趣分值最高的n个兴趣
    '''

    score = cal_label(cnt.astype('float64'), pct.astype('float64'))
    score = score.tolist()
    n_top = sorted(range(len(score)), key=lambda i: score[i], reverse=True)[:n]
    labels = set(labelList[index] for index in n_top)
    return labels


if __name__ == '__main__':
    name, age, gender, interests, edu, constellation = get_data(
        conf.aiqiyi_dir)
    genderList = ['男', '女']
    ageList = ['1-17', '18-24', '25-30', '31-35', '36-40', '40+']
    eduList = ['小学', '初中', '高中-中专', '大专', '本科', '硕士以上']
    constellationList = [
        '白羊座', '金牛座', '双子座', '巨蟹座', '狮子座', '处女座', '天秤座', '天蝎座', '射手座', '摩羯座',
        '水瓶座', '双鱼座'
    ]
    labelList = [(gender, genderList), (age, ageList), (edu, eduList),
                 (constellation, constellationList)]

    name_toidx, idx_toname = name_index(name)

    namelist = ['热血狂篮', '亲爱的活祖宗', '芈月传奇番外篇之邪恶游戏', '热血狂篮', 'others']
    namelists = [namelist, namelist]
    for each in labelList:
Пример #41
0
for i in range(len(d1)):
    data.append((d1[i], d2[i]))
data = get_data(data)

lengths = []
for i in data:
    lengths.append(len(i[0].phrase.split()))
    lengths.append(len(i[1].phrase.split()))

if params.max > 0:
    random.shuffle(data)
    data = data[0:params.max]

if params.mode == "ppdb":
    d = utils.get_data("../data/ppdb-XL-ordered-data.txt")
    random.shuffle(d)
    ct = 0
    for i in data:
        ct += len(i[0].phrase.split()) + len(i[1].phrase.split())
    data = []
    print ct
    idx = 0
    ct2 = 0
    while ct > 0:
        dd = d[idx]
        data.append(dd)
        v = len(dd[0].phrase.split()) + len(dd[1].phrase.split())
        ct -= v
        ct2 += v
        idx += 1
Пример #42
0
from utils import get_data


def solve(data, limit):
    last = {v: i for i, v in enumerate(data, 1)}
    prev = data[-1]
    for i in range(len(data) + 1, limit + 1):
        new = i - 1 - last[prev] if prev in last else 0
        last[prev] = i - 1
        prev = new

    return prev


if __name__ == '__main__':
    data = get_data("15")
    data = list(map(int, data.split(',')))

    print(solve(data, 2020))
    print(solve(data, 30000000))
Пример #43
0
#coding=utf-8
import os
import difflib
import tensorflow as tf
import numpy as np
from utils import decode_ctc, GetEditDistance

# 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取
from utils import get_data, data_hparams
data_args = data_hparams()
train_data = get_data(data_args)

# 1.声学模型-----------------------------------
# from model_speech.cnn_ctc import Am, am_hparams
#
# am_args = am_hparams()
# am_args.vocab_size = len(train_data.am_vocab)
# am = Am(am_args)
# print('loading acoustic model...')
# am.ctc_model.load_weights('logs_am/model.h5')

# 2.语言模型-------------------------------------------
from model_language.transformer import Lm, lm_hparams

lm_args = lm_hparams()
lm_args.input_vocab_size = len(train_data.pny_vocab)
lm_args.label_vocab_size = len(train_data.han_vocab)
lm_args.dropout_rate = 0.
print('loading language model...')
lm = Lm(lm_args)
sess = tf.Session(graph=lm.graph)
Пример #44
0
from utils import get_data, output

if __name__ == "__main__":

    # 1.prepare data and define epochs
    epochs = 10
    optimizer = "adam"
    dropout = 0.5
    data_type = 'raw'

    if sys.argv.__len__() == 3:
        data_type = sys.argv[1]
        epochs = int(sys.argv[2])

    data, train, test = get_data(data_type)

    # 2.count #unique features for each sparse field,and record dense feature field name
    fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
                              for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, )
                                                                            for feat in dense_features]
    dnn_feature_columns = fixlen_feature_columns
    linear_feature_columns = fixlen_feature_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

    # 3.generate input data for model

    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}
Пример #45
0
        73: "CDLTAKURI",
        74: "CDLTASUKIGAP",
        75: "CDLTHRUSTING",
        76: "CDLTRISTAR",
        77: "CDLUNIQUE3RIVER",
        78: "CDLUPSIDEGAP2CROWS",
        79: "CDLXSIDEGAP3METHODS",
        80: 'ema_12',
        81: 'ema_26',
        82: 'upper_list',
        83: 'lower_list',
        84: 'K_value',
        85: 'D_value',
        86: 'label'
    }
    df = utils.get_data(MYDB, SQL, NAME_DICT)
    if isinstance(df, int) == False and len(df) != 0:
        df = utils.get_data(MYDB, SQL, NAME_DICT)

        param_grid = {
            'n_estimators': [100, 200, 300, 400, 500, 700, 800, 1000],
            'max_features': ['auto', 'sqrt', 'log2'],
            'max_depth': [4, 5, 6, 7, 8, 10, 15, 20, 50, 100],
            'criterion': ['gini', 'entropy']
        }
        scoring = ['precision_macro', 'recall_macro']
        feature_cols = [
            'volume', 'numberOfTrades', 'var_ema', 'var_bollinger',
            'var_stoch', 'rsi_indicator', 'stoch_indicator', 'RSI',
            'ema_indicator', 'bollinger_indicator', 'CDL2CROWS',
            'CDL3BLACKCROWS', 'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE',
Пример #46
0
    workdir = '/home/miguel/Documents/tese/ViscoPlastic-ML/2D/butterfly/'
    abaqus_dir = '/home/miguel/Documents/tese/ViscoPlastic-ML/abaqus_deploy/butterfly/'
    workvid = '/home/miguel/Documents/tese/ViscoPlastic-ML/2D/butterfly/videos/'
    P = 'P_1'
    el_num = 75

    P = ['P_1', 'P_2', 'P_3']
    el_num = [75, 322, 229]

    for n in range(0, 3):
        data_dir = []
        data_dir.append(abaqus_dir + 'state_butterfly_hist.txt')
        data_dir.append(workdir + '/results/' + P[n] + '/data_' + P[n] +
                        '.csv')
        data_dir.append(abaqus_dir + 'deriv_butterfly_hist.txt')
        abaqus, chaboche = get_data(data_dir)
        chaboche = chaboche.drop(chaboche.index[len(chaboche) - 1])
        abaqus = rename_headers(abaqus)

        # Select from abaqus dataframe info about el_num and int_point
        fea = get_dataframe(abaqus, el_num[n], 2)
        exit()
        # remove repeated values
        # Plot and save graphs of Back stress
        title = 'Back Stress'
        Writer = animation.writers['ffmpeg']
        writer = Writer(fps=2, metadata=dict(artist='Me'), bitrate=-1)
        fig = plt.figure(figsize=(10, 6))
        plt.xlim(0, 0.05)
        plt.ylim(0, 75)
        ani = matplotlib.animation.FuncAnimation(fig,
Пример #47
0

print("ENTER THE EXPERIMENT NUMBER: ")
print("1. Experiment 1")
print("2. Experiment 2")
print("3. Experiment 3")
print("4. Experiment 4")
print("5. ANN with layers 1024-512-256-32-1 and sigmoid")
print("6. ANN with layers 1024-512-256-32-1 and sigmoid with Adam")
print("7. CNN with tanh activation")
print("8. CNN with tanh activation and Adam")
print("9. ANN with original layers and relu as activation")

choice = int(input())
if (choice == 1):
    X, Y = utils.get_data('steering', 0)
    num_examples = X.shape[0]
    split = int(num_examples * 0.8)
    X_train, Y_train, means, stds = prepareTrainData(X[:split], Y[:split])
    X_test, Y_test = X[split:], Y[split:]
    X_val, Y_val = prepareTestData(X_test, Y_test, means, stds)
    experiment_one(X_train, Y_train, X_val, Y_val)
elif (choice == 2):
    X, Y = utils.get_data('steering', 0)
    num_examples = X.shape[0]
    split = int(num_examples * 0.8)

    X_train, Y_train, means, stds = prepareTrainData(X[:split], Y[:split])
    X_test, Y_test = X[split:], Y[split:]
    X_val, Y_val = prepareTestData(X_test, Y_test, means, stds)
    experiment_two_32(X_train, Y_train, X_val, Y_val)
Пример #48
0
from utils import get_data

data = list(map(int, get_data().split()))


def fuel_req(mass):
    return max((mass // 3) - 2, 0)


print("Fuel requirement without fuel: ", sum([fuel_req(m) for m in data]))

tot_fuel = 0
for m in data:
    fuel = fuel_req(m)
    tot_fuel += fuel
    while fuel > 0:
        fuel = fuel_req(fuel)
        tot_fuel += fuel

print("Fuel requirement with fuel: ", tot_fuel)
Пример #49
0
            elif instruction == "^":
                santa.y += 1
            elif instruction == "v":
                santa.y -= 1

            visited_locations.add((santa.x, santa.y))

        elif i % 2 == 1:
            if instruction == ">":
                robo_santa.x += 1
            elif instruction == "<":
                robo_santa.x -= 1
            elif instruction == "^":
                robo_santa.y += 1
            elif instruction == "v":
                robo_santa.y -= 1

            visited_locations.add((robo_santa.x, robo_santa.y))

    return len(visited_locations)


if __name__ == '__main__':
    data = get_data("03")

    p1_result = part_one(data)
    print(p1_result)

    p2_result = part_two(data)
    print(p2_result)
Пример #50
0
# import matplotlib.pyplot as plt

# 0.准备训练所需数据------------------------------
data_args = data_hparams()
data_args.data_type = 'train'
data_args.data_path = '../../dataset/'
# data_args.data_path = '/mnt/data/wanli/dataset/data_thchs30/train/'
data_args.thchs30 = True
data_args.aishell = False
data_args.prime = False
data_args.stcmd = False
data_args.batch_size = 4
# data_args.data_length = 10
data_args.data_length = None
data_args.shuffle = True
train_data = get_data(data_args)

# 0.准备验证所需数据------------------------------
data_args = data_hparams()
data_args.data_type = 'dev'
data_args.data_path = '../../dataset/'
# data_args.data_path = '/mnt/data/wanli/dataset/data_thchs30/test/'
data_args.thchs30 = True
data_args.aishell = False
data_args.prime = False
data_args.stcmd = False
data_args.batch_size = 4
data_args.data_length = None
# data_args.data_length = 10
data_args.shuffle = True
dev_data = get_data(data_args)
Пример #51
0
            print('Epoch %05d: early stopping' % (self.stopped_epoch + 1))

    def get_monitor_value(self, logs):
        logs = logs or {}
        monitor_value = logs.get(self.monitor)
        return monitor_value


# -------------------------------------------------------------------------------------------------------------------
# Functions and classes end, training script start ------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------

model = getattr(models, args.model)  # get model_fn

# get datasets for target / source models
target_train_set, target_test_set, source_train_set, source_test_set, input_dim, n_classes = utils.get_data(
    args.dataset, args.ndata)
regularization = "none"
if args.defense in ['l1', 'l2', 'dropout']:
    regularization == args.regularization
elif args.defense in ['', 'dp', 'advreg', 'fine-tune',
                      'whole']:  # last two are transfer learning
    regularization = 'none'  # these train the model differently, which we will do at training time below.
else:
    raise ValueError(f"Defense: {args.defense} not valid")

# define and compile model
target_model = model(input_dim, args.model_depth, regularization,
                     args.reg_constant, n_classes)
source_model = model(input_dim, args.model_depth, regularization,
                     args.reg_constant, n_classes)
t_optim = tf.keras.optimizers.Adam()
    res = sys_score / ideal_score
    return res


def save_ass(a):
    with open('cluster_assignment', 'wb') as f:
        pickle.dump(a, f)


def load_ass(f_name):
    with open(f_name, 'rb') as f:
        model = pickle.load(f)
    return model


docs, q_docs, mod = utils.get_data(mode='test', t_='lda', n_=64)
print 'Loading cluster'
with open('km_80', 'rb') as f:
    model = pickle.load(f)
# a = assign_cluster(docs,model,mod)
# print 'Saving assignments'
# save_ass(a)
a = load_ass('cluster_assignment')
avg = 0
n = len(q_docs)
print n
for q_id in q_docs:
    q = q_docs[q_id]
    rankings = get_top(q, docs, mod, model, a)
    val = get_ndcg(rankings, 10)
    print str(q_id), str(val)
Пример #53
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset,
        config.data_path,
        config.cutout_length,
        validation=True,
        autoaugment=config.autoaugment)

    if config.label_smooth != 0:
        criterion = utils.CrossEntropyLabelSmooth(
            10, config.label_smooth).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)
    use_aux = config.aux_weight > 0.
    if config.dataset in utils.LARGE_DATASETS:
        model = AugmentCNNImageNet(input_size, input_channels,
                                   config.init_channels, n_classes,
                                   config.layers, use_aux, config.genotype)
    else:
        model = AugmentCNN(input_size,
                           input_channels,
                           config.init_channels,
                           n_classes,
                           config.layers,
                           use_aux,
                           config.genotype,
                           SSC=config.SSC)
    model = nn.DataParallel(model, device_ids=config.gpus).to(device)
    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))

    # weights optimizer
    if config.p != 1:
        optimizer = torch.optim.SGD(model.parameters(),
                                    1.,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    config.lr,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    if config.p == 1:
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, config.epochs)
    else:
        lr_cpa = utils.cosine_power_annealing_lr(nepochs=config.epochs,
                                                 min_lr=config.lr_min,
                                                 max_lr=config.lr,
                                                 p=config.p)
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [lr_cpa])
    best_top1 = 0.
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()
        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.module.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        print("")
    logger.info("Final best Prec@1 = {:.4%} for job {}".format(
        best_top1, config.name))
Пример #54
0
import numpy as np
import torch
from torch import optim
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from utils import get_data, train, predict

sc = MinMaxScaler()
seq_length = 4
alpha_array = np.array([0.9])

Xtrain, Xtest, Ytrain, Ytest, dataX, dataY = get_data(alpha_array,
                                                      series_length=1440,
                                                      seq_length=seq_length,
                                                      sc=sc)

# The model will be a sequence of layers (?)
model = torch.nn.Sequential()
model.add_module('dense1', torch.nn.Linear(seq_length, 8))
model.add_module('relu1', torch.nn.Sigmoid())
model.add_module('dense2', torch.nn.Linear(8, 1))

loss = torch.nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=0.0611, momentum=0.9)

epochs = 200  # Increase amount of epochs for better accuracy.
batch_size = 32
n_batches = Xtrain.size()[0] // batch_size

costs = []
        top_k_corrs = [
            pair for pair in corrs[1:]
            if pair[0] in items_ranked_by_user and pair[1] > 0
        ][:self.k]

        #biases for the ranked items of user u
        B_ujs = [
            self.global_bias + self.user_biases[user] +
            self.item_biases[pair[0]] for pair in top_k_corrs
        ]
        N_uks = [
            pair[1] for pair in top_k_corrs
        ]  #the corr of k nearest neighbors of item i that are rated by user u
        R_ujs = [csr[user, pair[0]]
                 for pair in top_k_corrs]  #ratings of these items

        B_ujs = np.array(B_ujs)
        N_uks = np.array(N_uks)
        R_ujs = np.array(R_ujs)

        pred = (np.dot(N_uks, (R_ujs.T - B_ujs)) / N_uks.sum()) + b_ui
        return pred


if __name__ == '__main__':
    baseline_knn_config = Config(k=10)
    train, validation = get_data()
    knn_baseline = KnnBaseline(baseline_knn_config)
    knn_baseline.fit(train)
    print(knn_baseline.calculate_rmse(validation))
Пример #56
0
from sklearn.externals import joblib
from sklearn import preprocessing
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import validation_curve
from matplotlib import pyplot as plt
import pickle
import sys
import numpy as np
from sklearn.utils import shuffle
from utils import get_data

workdir = '/home/miguel/Documents/tese/ViscoPlastic-ML/2D/train/'

# Load Dataset
X, y = get_data(workdir, 'training_data')

scaler_x = preprocessing.StandardScaler()
scaler_y = preprocessing.StandardScaler()

# Fit the scaler to transform features and targets
scaler_x.fit(X)
scaler_y.fit(y)

# Split Dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.3,
                                                    random_state=42)

# Transform data to training and testing
X = scaler_x.transform(X)
Пример #57
0
def get_item_by_title(item_title):
    for item in get_data():
        if item["title"] == item_title:
            return item
Пример #58
0
import utils as utl
import pso

data = utl.get_data(file_path="assets/sprint7ToroideMixto.csv")
pso = pso.ClusteringPSO(data, 50, max_iter=100)
print(pso.search)
Пример #59
0
from utils import get_data, answers, print_answers

data = get_data(2020, 11).split('\n')


def surrounding_seats(data, row, seat):
    full = 0
    if (row + 1) < len(data):
        if data[row + 1][seat] == "#":
            full += 1
        if (seat + 1) < len(data[row]):
            if data[row + 1][seat + 1] == "#":
                full += 1
        if (seat - 1) >= 0:
            if data[row + 1][seat - 1] == "#":
                full += 1
    if (row - 1) >= 0:
        if data[row - 1][seat] == "#":
            full += 1
        if (seat + 1) < len(data[row]):
            if data[row - 1][seat + 1] == "#":
                full += 1
        if (seat - 1) >= 0:
            if data[row - 1][seat - 1] == "#":
                full += 1
    if (seat + 1) < len(data[row]):
        if data[row][seat + 1] == "#":
            full += 1
    if (seat - 1) >= 0:
        if data[row][seat - 1] == "#":
            full += 1
Пример #60
0
import utils
import loss

if __name__ == '__main__':
    # Make directory to save plots
    path = os.path.join(
        os.getcwd(), 'plots', args.loss + ("_top_k" if args.topk else "") +
        ("_sn" if args.spectral_norm else "") +
        ("_clip" if args.clip_weights else ""))
    os.makedirs(path, exist_ok=True)
    # Init hyperparameters
    fixed_generator_noise: torch.Tensor = torch.randn(
        [args.samples // 10, args.latent_size], device=args.device)
    # Get data
    data: torch.Tensor = utils.get_data(samples=args.samples).to(args.device)
    # Get generator
    generator: nn.Module = utils.get_generator(latent_size=args.latent_size)
    # Get discriminator
    discriminator: nn.Module = utils.get_discriminator(
        use_spectral_norm=args.spectral_norm)
    # Init Loss function
    if args.loss == 'standard':
        loss_generator: nn.Module = loss.GANLossGenerator()
        loss_discriminator: nn.Module = loss.GANLossDiscriminator()
    elif args.loss == 'non-saturating':
        loss_generator: nn.Module = loss.NSGANLossGenerator()
        loss_discriminator: nn.Module = loss.NSGANLossDiscriminator()
    elif args.loss == 'hinge':
        loss_generator: nn.Module = loss.HingeGANLossGenerator()
        loss_discriminator: nn.Module = loss.HingeGANLossDiscriminator()