def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True): proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, )) try: proc.start() proc.join(max_time) if proc.is_alive(): proc.terminate() Statistics.incr_module_timeout_statistic(module_name) err_mess = "{}: processing timeout: {}".format(module_name, item_id) print(err_mess) publisher.info(err_mess) return [] else: if r_set: all_items = r_serv_cache.smembers(redis_key) else: all_items = r_serv_cache.lrange(redis_key, 0 ,-1) r_serv_cache.delete(redis_key) proc.terminate() return all_items except KeyboardInterrupt: print("Caught KeyboardInterrupt, terminating workers") proc.terminate() sys.exit(0)
def MultinomialNaiveBayesGridSearch_OLD(): # C=1 is best cs = 10.0**np.arange(-9,2,0.5) aucs = [] for c in cs: clf = MultinomialNB(alpha=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search - Multinomial Naive Bayes ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(cs)): if aucs[i] > aucs[best]: best = i c = cs[best] clf = MultinomialNB(alpha=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best alpha = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search - Multinomial Naive Bayes ROC Curve') myplt.show() return clf
def get_facebook_posts(self): for user_handle in self.user_handle_list: try: posts_info = self.graph.get_object(id=user_handle + "/posts") post_list = posts_info.get("data", []) for post in post_list: facebook_post_obj = {} facebook_post_obj["id"] = post.get("id", "") facebook_post_obj["url"] = "" facebook_post_obj["handle"] = user_handle facebook_post_obj["content"] = post.get("message", "").replace( "'", "") facebook_post_obj["timestamp"] = post.get( "created_time", "") facebook_post_obj["time_lookup"] = datetime.datetime.now( ).strftime("%Y-%m-%d %H:%M:%S") insert_in_facebook_post(self.conn, facebook_post_obj) #print(facebook_post_obj["id"],":...inserted") except StandardError as e: # print user_handle + "\t" + str(e) Statistics.add_facebook_error_sites(user_handle + "\t" + str(e)) except facebook.GraphAPIError as e: #print user_handle + "\t" + str(e) Statistics.add_facebook_error_sites(user_handle + "\t" + str(e))
def open(self, ga_engine): """ Open the database connection :param ga_engine: the GA Engine .. versionchanged:: 0.6 The method now receives the *ga_engine* parameter. """ if self.mysqldbmod is None: logging.debug("Loading MySQLdb module...") self.mysqldbmod = Util.importSpecial("MySQLdb") logging.debug("Opening database, host=%s", self.host) self.connection = self.mysqldbmod.connect(host=self.host, user=self.user, passwd=self.passwd, db=self.db, port=self.port) temp_stats = Statistics.Statistics() self.createStructure(temp_stats) if self.resetDB: self.resetStructure(Statistics.Statistics()) if self.resetIdentify: self.resetTableIdentify()
def reportMeshLoadingFinished(self): print "Loading mesh from file finished..." print "Nodes: " + str(len(self.pslg.points)) print "Segments: " + str(len(self.pslg.segments)) print "Elements: " + str(len(self.pslg.elements)) print "" print "Omega elements: " + str(len(self.parameters.omega)) print "Omega area: " + str(self.parameters.omegaArea) print "Omega three elements: " + str(len(self.parameters.omegaThree)) print "Omega three area: " + str(self.parameters.omegaThreeArea) print "Omega d segments: " + str(len(self.parameters.omegaD)) print "Omega d length: " + str(self.parameters.omegaDLength) print "" print "Mesh statistics..." angleMinMax = Statistics.ComputeElementAngleRange(self.parameters) print "Element angle min: " + str(angleMinMax[0]) print "Element angle max: " + str(angleMinMax[1]) segmentLengthMinMax = Statistics.ComputeSegmentLengthRange( self.parameters) print "Segment length min: " + str(segmentLengthMinMax[0][0]) print "Min segment: " + str(segmentLengthMinMax[0][1]) + " -> " + str( segmentLengthMinMax[0][2]) print "Segment length max: " + str(segmentLengthMinMax[1][0]) print "Max segment: " + str(segmentLengthMinMax[1][1]) + " -> " + str( segmentLengthMinMax[1][2]) print ""
def getTodayTest(): params = request.get_json() try: ticket = params['tic'] except: return {'status': 'bad', 'reason': 'No tic field is found'} try: price = pickle.load(open('data/tmp_price_%s.pkl' % ticket, 'rb')) except: # sym = SymbolHistory(ticket, StockAPI.getPriceHistory(ticket, 365*2+50)) price = StockAPI.getPriceHistory(ticket, 200) pickle.dump(price, open('data/tmp_price_%s.pkl' % ticket, 'wb')) sym = SymbolHistory(ticket, price=price) df = pd.DataFrame(sym.ohcl) df['volumn'] = sym.volumn df['time'] = sym.time avg = df.close.rolling(window=10).mean() pf = Statistics.polyfit(df.close, 1, errAccept=0.008, avg=avg) pf2 = Statistics.polyfit(df.close, 2, errAccept=0.01, avg=avg) df['fitval'] = pf.fitval df['fitCurve'] = pf2.fitval daily = Statistics.dailyStat(sym) df['volRsi'] = daily.volRsi df['buyVol'] = daily.buyVol df['sellVol'] = daily.sellVol df['unkVol'] = df.volumn - daily.sellVol - daily.buyVol df['unkVol'][df.unkVol.isnull()] = df.volumn df = df.replace({np.nan: None}) return {'status': 'ok', 'payload': df.to_dict(orient='records')}
def LogisticGridSearch_OLD(): # C=1 is best cs = 10.0**np.arange(-1,2,0.25) aucs = [] for c in cs: clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search - Logistic Regression ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(cs)): if aucs[i] > aucs[best]: best = i c = cs[best] clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train) probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best C = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search - Logistic Regression ROC Curve') myplt.show() return clf
def GridSearch(data,params,classifier,classifier_name,paramname,probstype=1,clf_kwargs={}): f_train,f_test,y_train,y_test = data # C=1 is best def getROC(clf,probstype): if probstype == 1: probs = clf.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) else: probs = clf.decision_function(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs) return fpr,tpr aucs = [] mykwargs = clf_kwargs.copy() for c in params: mykwargs[paramname] = c clf = classifier(**mykwargs).fit(f_train, y_train) fpr,tpr = getROC(clf,probstype) roc_auc = auc(fpr,tpr) #cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search: '+classifier_name+' ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(params)): if aucs[i] > aucs[best]: best = i c = params[best] mykwargs[paramname] = c clf = classifier(**mykwargs).fit(f_train, y_train) fpr,tpr = getROC(clf,probstype) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best '+paramname+' = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search: '+classifier_name+' ROC Curve') myplt.show() maxAUC = aucs[best] cs = params optC = params[best] plt.figure() maxauclabel = ("Max AUC = %0.2f, " %maxAUC)+paramname+(" =%s" %optC) plt.semilogx(cs,np.ones(len(cs))*maxAUC,'r',label=maxauclabel,linewidth=2,zorder=10) plt.semilogx(cs,aucs,zorder=1) plt.title('Grid Search: '+classifier_name+'AUC Scores') plt.xlabel(paramname) plt.ylabel('AUC Score') plt.legend(loc="lower right") #plt.legend(loc='lower left', bbox_to_anchor=(1, 0), # ncol=1, fancybox=True, shadow=False) plt.show() return clf
def SGDGridSearch_OLD(): # C=1 is best cs = 10.0**np.arange(-9,9,1) aucs = [] for c in cs: clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train) probs = clf.decision_function(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs) roc_auc = auc(fpr,tpr) cstr = '%0.2e'%c myplt = st.plotROC(fpr,tpr,roc_auc, figure=False, show=False, returnplt=True, showlegend=False, title='Grid Search - SGD Classifier ROC Curve') aucs.append(roc_auc) best = 0 for i in range(len(cs)): if aucs[i] > aucs[best]: best = i c = cs[best] clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train) probs = clf.decision_function(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs) myplt = st.plotROC(fpr,tpr,roc_auc, legendlabel='Best C = %0.2e' % c, figure=False, show=False, returnplt=True, showlegend=True, title='Grid Search - SGD Classifier ROC Curve') myplt.show() return clf, aucs
def proposition_3(): print() print(" ########## Proposition 3: ##########") s1 = Scenario(excel=True, excelName='./Scenarios_Propositions.xlsx', id="P3_1") s2 = Scenario(excel=True, excelName='./Scenarios_Propositions.xlsx', id="P3_2") s3 = Scenario(excel=True, excelName='./Scenarios_Propositions.xlsx', id="P3_3") data1 = s1.simulate() data2 = s2.simulate() data3 = s3.simulate() data1.plotMeanRPS(save=True) data2.plotMeanRPS(save=True) data3.plotMeanRPS(save=True) data1.computeData() data2.computeData() data3.computeData() print("UWM1 <-> UWM3 " + stats.getFullStats(data1.uwm, data3.uwm, inter=True)) print("CWM1 <-> CWM3 " + stats.getFullStats(data1.cwm, data3.cwm, inter=True))
def married_women_vs_unmarried(data): """ print summary statistics of earnings by married women vs. unmarried women :param data: :return: None """ women_data = filter_by_features(data, 'female', {1})[0] married_women_data, unmarried_women_data = filter_by_features( women_data, 'marital', {1, 2, 3}) population_to_data = { 'Married Women': married_women_data, 'Unmarried Women': unmarried_women_data } statistic_functions = [Statistics.mean, Statistics.median] education_scopes = [(0, 10), (11, 20)] order_of_print = ['Married Women', 'Unmarried Women'] print('Question 2:') for scope in education_scopes: print("If {0}<=Y<={1}, then:".format(scope[0], scope[1])) for population in order_of_print: Statistics.population_statistics(population, population_to_data[population], 'education', 'earnings', scope[0], scope[1], statistic_functions)
def learn(self): self.train_losses = [] self.validation_losses = None if self.validation_set is None else [] self.validation_accuracies = None if self.validation_set is None else [] xtrain = self.train_set ytrain = self.train_labels for i in range(Params.MAX_EPOCH): loss = Statistics.MSELoss() for i, x, y in zip(range(1, len(xtrain) + 1), xtrain, ytrain): o = [np.random.normal(0, 1) for _ in range(self.outputDim)] loss.update(o, y) self.train_losses.append(loss.get()) if self.validation_set is not None: loss = Statistics.MSELoss() accuracy = Statistics.MEELoss() for x, y in zip(self.validation_set, self.validation_labels): o = [np.random.normal(0, 1) for _ in range(self.outputDim)] loss.update(o, y) accuracy.update(o, y) self.validation_losses.append(loss.get()) self.validation_accuracies.append(accuracy.get())
def binning_prof (raw, knum=200, NBmin=100, plot=True): 'Do binning analysis.\ knum: number of bin-length to be simulated. (k=bin size)\ NBmin: minimum number of bins.\ Return: [uncorrelate_data(NBmin)], [bin_size(knum)], [auto_correlation_time(knum)]' uncorr_data, ks, corrtime, err = [], [], [], [] # Calcualte kmax and kmin, bin length is graw as n*kmin, n=1,2,... kmax = len(raw) / NBmin if kmax == 0: kmax = 1 kmin = kmax / knum if kmin == 0: kmin = 1 knum = kmax / kmin # Mearge the bin with length k=kmin basedata = _merge_bin (kmin, raw) # Get "_ks" and "_corrtime". "_uncorr_data" remains with the largest bin-length. var0 = st.var (raw) for q in range(1, knum+1): uncorr_data = _merge_bin (q, basedata) k, var = q*kmin, st.var (uncorr_data) err.append ([k,st.err(uncorr_data)]) ks.append (k) corrtime.append (_auto_corr_time (k, var, var0)) if plot: pl.plot (ks, corrtime, marker='.') pl.xlabel ('bin size') pl.ylabel ('auto correlation time') pl.show() return uncorr_data, ks, corrtime
def setup_display(self): self.FGStats.statsTable.insertRow(0) self.FGStats.statsTable.setItem(0, 0, QtWidgets.QTableWidgetItem(self.name)) self.FGStats.statsTable.setItem( 0, 1, QtWidgets.QTableWidgetItem(self.number)) self.FGStats.statsTable.setItem( 0, 2, QtWidgets.QTableWidgetItem(self.section)) self.FGStats.statsTable.setItem( 0, 3, QtWidgets.QTableWidgetItem(self.semester)) self.FGStats.statsTable.setItem( 0, 4, QtWidgets.QTableWidgetItem( str(Statistics.calculate_mean(self.studentGrades)))) self.FGStats.statsTable.setItem( 0, 5, QtWidgets.QTableWidgetItem( str(Statistics.calculate_median(self.studentGrades)))) self.FGStats.statsTable.setItem( 0, 6, QtWidgets.QTableWidgetItem( str(Statistics.calculate_mode(self.studentGrades)))) self.FGStats.statsTable.setItem( 0, 7, QtWidgets.QTableWidgetItem( str(Statistics.calculate_std_dev(self.studentGrades)))) self.FGStats.statsTable.resizeColumnsToContents()
def random_spherical_samples(pipeline): """ generates samples from the tumor according to the specs """ pipeline.tumor.cells samples = [] if not pipeline.tumor: raise Exception('Tumor doesnt exist') sampler = SphericalSampler(pipeline.tumor) # rand_coordinate = generate_coordinate( np.max( sampler.cell_positions ), np.min( sampler.cell_positions ) ) rand_coordinate = sample_coordinate(sampler.cell_positions, deviate=True) pipeline.print2('Begining Tumor Sampling') for radius in pipeline.specs['RADII']: # generate 3 random coodinate pipeline.print2('Sampling radius:' + str(radius)) for i in xrange(pipeline.specs.get('repeats', 25)): # generate a new coordinate # centre = ( rand_coordinate.next() , rand_coordinate.next() , rand_coordinate.next() ) centre = rand_coordinate.next() # conduct the sample sample = sampler.sample(radius=radius, centre=centre, with_genotypes=True) print Statistics.get_drivers_only(sample[1], sampler.tumor.drivers) # insert a tuple of (radius, center, sample) samples.append((radius, centre, sample)) pipeline.print2( str(pipeline.specs.get('repeats', 25)) + ' samples conducted') pipeline.print2('Sampling completed') pipeline.samples = samples
def main(self, argv): dataCleaner = CleanData() statsGenerator = Statistics() dataJoiner = JoinData() df_clean_data = dataCleaner.getCleanData(argv[1]).cache() statsGenerator.generateStats(df_clean_data) dataJoiner.joinData(df_clean_data)
def variability(x,y): sumxy=Statistics.sumXY(x,y) sumx=Statistics.Sum(x) sumy=Statistics.Sum(y) size=Statistics.size(x) sxy=sumxy-(sumx*sumy)/size return sxy
def totalVariability(x): size=Statistics.size(x) sumsquare=Statistics.sumSquares(x) sumx=Statistics.Sum(x) totalvariability=sumsquare-(math.pow(sumx,2)/size) return totalvariability
def checkMoneyFlow(symbol: SymbolHistory, lookback=20, intras=None, halfsess=False): # dates = symbol.time.iloc[-period:].reset_index(drop=True) rsi = symbol.rsi().iloc[-lookback:].reset_index(drop=True) priceChg = symbol.close.diff().iloc[-lookback:].reset_index(drop=True) date = pd.to_datetime(symbol.time.iloc[-lookback:].reset_index(drop=True), unit='s') dailyStat = Statistics.dailyStat(symbol, lookback=lookback) movingDir = Statistics.getMovingDirection(symbol.close.iloc[-lookback - 10:]) signal = [] for i in range(5, lookback): p = dailyStat.iloc[i] s = '' if priceChg[i] < 0 and p['volRsiChg'] >= 20: s += 'VolRsiBigIncWhenPriceDrop ' if priceChg[i] < 0 and p['volRsiChg'] > 8 and p['volRsi'] > 60: s += 'VolRsiRecoverWhenPriceDrop ' if p['volRsiChg'] >= 15 and p['volRsi'] > 50: s += 'VolRsiBigInc>50 ' if all([ p['volRsiChg'] > 0, dailyStat.iloc[i - 1]['volRsiChg'] > 0, priceChg[i] < 0, priceChg[i - 1] < 0 ]): s += 'VolRsiPosDiver ' if p['volRsiChg'] - dailyStat.iloc[i - 1]['volRsiChg'] >= 20: s += 'VolRsiBigSwing ' # if p['volRsi'] > 95: # incCnt = 0 # for j in range(5): # if dailyStat.loc[i-j]['volRsiChg'] > 0: incCnt += 1 # if incCnt > 3: # s += 'IncrHighVolRSI ' # if i > 10: # incCnt = 0 # for j in range(10): # if dailyStat.loc[i-j]['volRsiChg'] > 0: incCnt += 1 # if incCnt > 8: # s += 'VolRSILongStreakInc ' # if rsi.loc[i-1] < 30 and rsi.loc[i-1] < rsi.loc[i] and p['volRsiChg'] > 0: # s += 'RSI<30ButRecover ' # if p['volRsi'] > 85: # s += 'VolRsi>85' if s != '': s = '%s : %s' % (date[i].strftime('%Y/%m/%d'), s) signal.append(s) return signal, dailyStat
def statistics_run(data, y_predicted, y_test, arguments): # Create statistics object statistics = Statistics(data) # Plot the genre-distribution if "plot" in arguments: print("Plotting genre distribution") statistics.plot_frequency_of_genres() print("\n****RESULTS****\n") # Compare predicted values with true-values (accuracy) for classifier, predicted_value in y_predicted.items(): print("Accuracy " + classifier + ":") print( str(round(100 * metrics.accuracy_score(y_test, predicted_value), 2)) + "%\n") if "kpi" in arguments: statistics.get_KPI_for_each_genre(y_predicted, y_test) # Create and visualize confusion matrix if "cm" in arguments: statistics.calculate_and_plot_confusion_matrix(y_test, y_predicted)
def setup_display(self): row_count = self.gradesheetTable.rowCount() col_count = self.gradesheetTable.columnCount() - 2 student_grades = [] #Loop through our grade table for col in range(1, col_count): row_insert = self.AStats.statsTable.rowCount() self.AStats.statsTable.insertRow(row_insert) assignment_grades = [] assignment_name = self.gradesheetTable.horizontalHeaderItem( col).get_assignment_name() assignment_points = self.gradesheetTable.horizontalHeaderItem( col).get_assignment_points() self.AStats.statsTable.setItem( row_insert, 0, QtWidgets.QTableWidgetItem(assignment_name)) self.AStats.statsTable.setItem( row_insert, 1, QtWidgets.QTableWidgetItem(assignment_points)) assignment_grades.append(assignment_name) assignment_grades.append(assignment_points) assignment_grades.append({}) for row in range(1, row_count): student_id = self.gradesheetTable.verticalHeaderItem( row).get_student_uuid() grade = self.gradesheetTable.item(row, col).text() if grade == "" or grade == "-": grade = 0 assignment_grades[2][student_id] = float(grade) student_grades.append(assignment_grades) for counter, assignment in enumerate(student_grades): mean = Statistics.calculate_mean(assignment[2].values()) median = Statistics.calculate_median(assignment[2].values()) mode = Statistics.calculate_mode(assignment[2].values()) std_dev = Statistics.calculate_std_dev(assignment[2].values()) self.AStats.statsTable.setItem( counter, 2, QtWidgets.QTableWidgetItem(str(mean))) self.AStats.statsTable.setItem( counter, 3, QtWidgets.QTableWidgetItem(str(median))) self.AStats.statsTable.setItem( counter, 4, QtWidgets.QTableWidgetItem(str(mode))) self.AStats.statsTable.setItem( counter, 5, QtWidgets.QTableWidgetItem(str(std_dev))) self.AStats.statsTable.resizeColumnsToContents()
def BiasScanAnalysis(): dictOfBiasScanFiles = GetBiasResultFiles() numberOfTestedBoards = len(dictOfBiasScanFiles) for PowerBoardID in dictOfBiasScanFiles: #if not PowerBoardID == str(6): # continue print "PowerBoardID " + str(PowerBoardID) for PowerUnitID in dictOfBiasScanFiles[PowerBoardID]: print "PowerUnitID " + str(PowerUnitID) for Load in dictOfBiasScanFiles[PowerBoardID][PowerUnitID]: bsData = st.BiasScan() bsData.readFile( resultsFolder + dictOfBiasScanFiles[PowerBoardID][PowerUnitID][Load]) vint, vslope, iint, islope = bsData.visualizeAndCheck() dictOfBiasScanFiles[PowerBoardID][PowerUnitID][Load] = iint if Load == "High": print "Offset " + str(iint) if dictOfBiasScanFiles[PowerBoardID]["Right"][ "High"] > -0.0075 and dictOfBiasScanFiles[PowerBoardID][ "Left"]["High"] > -0.0075: print "Grade for this power board is: Inner Layers grade" else: print "Grade for this power board is: Outer Layers grade"
def stats(airports, translator): stats = Statistics.Statistics(airports, translator) # Loop through the options until the user selects 'q' while (True): printStatsMenu() userInput = raw_input() if (userInput == "1"): print(stats.getLongestFlight()) elif (userInput == "2"): print(stats.getShortestFlight()) elif (userInput == "3"): print(stats.getAverageFlightDistance()) elif (userInput == "4"): print(stats.getLargestCity()) elif (userInput == "5"): print(stats.getSmallestCity()) elif (userInput == "6"): print(stats.getAverageCitySize()) elif (userInput == "7"): print(stats.getContinentsList()) elif (userInput == "8"): print(stats.getHubCity()) elif (userInput == "q"): return
def linurit4(self): stats = st.statistics() print(stats.getAvIncr(self.dfUtlendingarGisti)) print(stats.getAvIncrMonth(self.dfUtlendingarGisti, 3)) stats.plotAll('Útlendingar gistinætur', self.dfUtlendingarGisti, months=[8, 9, 10, 11])
def _merge_bin (k, dat): 'Binning the data by length k. Return: a shorter data' shortdata, i = [], 0 while (i+k <= len(dat)): shortdata.append (st.msum (dat[i:i+k])/float(k)) i += k return shortdata
def writeTSV(inPath, outPath): if not exists(outPath): makedirs(outPath) captions = ['link', 'argument_id', 'reply_id', 'type', 'question', 'content', 'source'] for data, debate_id, section in Statistics.dataFiles(inPath): if len(data['arguments']['pro']) + len(data['arguments']['pro']) >= 100: records = [] for arg in argument_row(data): # arg['debate_id'] = debate_id arg['link'] = "https://www.openpetition.de/petition/argumente/" + debate_id records.append(arg) with open(join(outPath, section + '_' + debate_id + '.tsv'), 'w') as tsvFile: writer = Statistics.UnicodeDictWriter(tsvFile, fieldnames=captions, delimiter='\t', lineterminator='\n') writer.writeheader() writer.writerows(records)
def proposition_4(): print() print("########## Proposition 4: ##########") results = [] results_full = [] s = Scenario(excel=True, excelName='./Scenarios_Propositions.xlsx', id="P4_1") experts = copy.deepcopy(s.experts) for e in experts: epsilon = np.zeros(len(e.epsilon)) info_index = int(np.random.uniform(0, len(epsilon), 1)) epsilon[info_index] = 1 e.epsilon = epsilon for i in range(2, len(s.experts) + 1): s_temp = copy.deepcopy(s) s_temp.experts = [] for j in range(0, i): s_temp.experts.append(copy.deepcopy(experts[j])) data_temp = s_temp.simulate() data_temp.computeData() results_full.append(data_temp.uwm) results.append(data_temp.getMeanRPS()[1][0]) for i in range(0, len(results_full) - 1): print("UWM (" + str(i + 2) + " Exp) <-> UWM (" + str(i + 3) + " Exp) " + stats.getFullStats( results_full[i], results_full[i + 1], inter=True)) print(results)
def run(self): self.model_history = np.empty((self.n_periods, ), dtype=Model) for i in range(self.n_periods): sequence = self.ts[i:i + self.n_train] self.__fit_model_to_sequence( sequence, None) if i == 0 else self.__fit_model_to_sequence( sequence, self.model_history[i - 1].params.to_array()) self.model_history[i] = deepcopy(self.model) self.trade_logic.update_logic(self.model_history[i], sequence) if self._is_trade_open(): self.trade_history[-1].add_position( i, sequence[-1] ) # 'sequence' must be the log_return of the original TS ! if self.trade_logic.close_trade(): self.trade_history[-1].close() else: if self.trade_logic.open_trade(): self.trade_history.append( Trade(i + 1) ) # '+1' since the trade is actually opened the next period # Check if last 'Trade' is empty self.__compute_PnL() self.PnL_statistics = Statistics(self.PnL)
def get_description_information(self): for user_handle in self.user_handle_list: try: site_info = self.graph.get_object( id=user_handle, fields= "id,name,fan_count,rating_count,website,overall_star_rating,username" ) facebook_general_obj = {} facebook_general_obj["id"] = site_info.get("id", "") facebook_general_obj["handle"] = site_info.get("username", "") facebook_general_obj["name"] = site_info.get("name", "") if site_info.get("overall_star_rating", "") is "": facebook_general_obj["rating"] = 0 else: facebook_general_obj["rating"] = site_info.get( "overall_star_rating", "") if site_info.get("rating_count", "") is "": facebook_general_obj["count_reviews"] = 0 else: facebook_general_obj["count_reviews"] = site_info.get( "rating_count", "") if site_info.get("fan_count", "") is "": facebook_general_obj["count_likes"] = 0 else: facebook_general_obj["count_likes"] = site_info.get( "fan_count", "") facebook_general_obj["count_followers"] = 0 facebook_general_obj["time_lookup"] = datetime.datetime.now( ).strftime("%Y-%m-%d %H:%M:%S") facebook_general_obj[ "url"] = "https://www.facebook.com/" + site_info.get( "username", "") insert_in_facebook_handle_info(self.conn, facebook_general_obj) except StandardError as e: #print user_handle + "\t" + str(e) Statistics.add_facebook_error_sites(user_handle + "\t" + str(e)) except facebook.GraphAPIError as e: #print user_handle + "\t" + str(e) Statistics.add_facebook_error_sites(user_handle + "\t" + str(e)) """
def stats(self): return Statistics._leaving_one_out("./toTrain/Malw","./toTrain/NoMalw",1) #DIR_TO_PREDICT = "C:\\Users\\EstebanMontesMorales\\Desktop\\wmdGUI\\wmdLocalGUI\\Predicts\\" #print WMD().train() #print WMD().predict() #print WMD().stats()
def MedianImageColor(InputImage, Ignore=None): InputImage = InputImage.convert('L') InputImagePixels = InputImage.load() # flatten the image so that we can take the median ColoredImage = PILToCV2(InputImage) GreyImage = cv2.cvtColor(ColoredImage, cv2.COLOR_BGR2GRAY) FlatImage = GreyImage.flatten() if Ignore != None: PrunnedImage = [] for Item in FlatImage: if Item != Ignore: PrunnedImage.append(Item) return Statistics.Median(PrunnedImage) else: # take the median return Statistics.Median(FlatImage)
def calcRsi(tradingDays, index, numPeriods): if index == 0: tradingDays[index]['Gain'] = 0 tradingDays[index]['Loss'] = 0 tradingDays[index][f"{numPeriods}DayAvgGain"] = 0 tradingDays[index][f"{numPeriods}DayAvgLoss"] = 0 tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 0 tradingDays[index][f"{numPeriods}DayRSI"] = 0 else: if tradingDays[index]['Close'] > tradingDays[index - 1]['Close']: tradingDays[index]['Gain'] = tradingDays[index]['Close'] - tradingDays[index - 1]['Close'] tradingDays[index]['Loss'] = 0 else: tradingDays[index]['Gain'] = 0 tradingDays[index]['Loss'] = tradingDays[index - 1]['Close'] - tradingDays[index]['Close'] if index < numPeriods: tradingDays[index][f"{numPeriods}DayAvgGain"] = 0 tradingDays[index][f"{numPeriods}DayAvgLoss"] = 0 tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 0 tradingDays[index][f"{numPeriods}DayRSI"] = 0 elif index == numPeriods: tradingDays[index][f"{numPeriods}DayAvgGain"] = Statistics.findSimpleAverage(tradingDays, index - (numPeriods - 1), index, 'Gain') tradingDays[index][f"{numPeriods}DayAvgLoss"] = Statistics.findSimpleAverage(tradingDays, index - (numPeriods - 1), index, 'Loss') if tradingDays[index][f"{numPeriods}DayAvgLoss"] > 0: tradingDays[index][f"{numPeriods}DayRelativeStrength"] = tradingDays[index][f"{numPeriods}DayAvgGain"] / tradingDays[index][f"{numPeriods}DayAvgLoss"] tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"]) else: tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 1000000 tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"]) else: a = tradingDays[index - 1][f"{numPeriods}DayAvgGain"] b = tradingDays[index]['Gain'] tradingDays[index][f"{numPeriods}DayAvgGain"] = ((numPeriods - 1) * a + b) / numPeriods tradingDays[index][f"{numPeriods}DayAvgLoss"] = ((numPeriods - 1) * tradingDays[index - 1][f"{numPeriods}DayAvgLoss"] + tradingDays[index]['Loss']) / numPeriods if tradingDays[index][f"{numPeriods}DayAvgLoss"] > 0: tradingDays[index][f"{numPeriods}DayRelativeStrength"] = tradingDays[index][f"{numPeriods}DayAvgGain"] / tradingDays[index][f"{numPeriods}DayAvgLoss"] tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"]) else: tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 1000000 tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"])
def get_reddit_general(self): try: print str(strftime("%H:%M:%S", gmtime())) + ": Checking Reddit Genereal!" for user_handle in self.user_handle_list: subreddit_obj = self.r.subreddit(user_handle) reddit_general_obj = {} reddit_general_obj["id"] = subreddit_obj.id reddit_general_obj["count_readers"] = subreddit_obj.subscribers reddit_general_obj["name"] = subreddit_obj.display_name reddit_general_obj["information"] = subreddit_obj.description.replace("'","") moderator_str = "" for moderator in subreddit_obj.moderator(): moderator_str += ","+str(moderator) reddit_general_obj["moderators"] = moderator_str reddit_general_obj["time_lookup"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") reddit_general_obj["time_creation"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") insert_in_reddit_handle_info(self.conn,reddit_general_obj) Statistics.inc_reddit_descriptions() except StandardError as e: Statistics.add_reddit_error_sites(user_handle + "\t" + str(e))
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30): proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, )) try: proc.start() proc.join(max_time) if proc.is_alive(): proc.terminate() Statistics.incr_module_timeout_statistic(module_name) err_mess = "{}: processing timeout: {}".format(module_name, item_id) print(err_mess) publisher.info(err_mess) return None else: first_occ = r_serv_cache.get(redis_key) r_serv_cache.delete(redis_key) proc.terminate() return first_occ except KeyboardInterrupt: print("Caught KeyboardInterrupt, terminating workers") proc.terminate() sys.exit(0)
def BiasScanAnalysis: for load in loads: listOfPURBiasScanFiles = GetResultFiles(PowerUnitID = "Right", load = load, test = "BiasScan") numberOfTestedBoards = len(listOfPURBiasScanFiles) for bscanFile in listOfPURBiasScanFiles: bsData = st.BiasScan() bsData.readFile(bscanFile) vint, vslope, ivslope, iint, islope = bsData.visualizeAndCheck() resMeasured[load][0].append(ivslopes[0]) listOfPULBiasScanFiles = GetResultFiles(PowerUnitID = "Left", load = load, test = "BiasScan") for bscanFile in listOfPULBiasScanFiles: bsData = st.VoltageScan() bsData.readFile(bscanFile) vints, vslopes, ivslopes, iints, islopes = bsData.visualizeAndCheck() resMeasured[load][1].append(ivslopes[0]) for i in range(2): resMean[load][i] = sum(resMeasured[load][i])/len(resMeasured[load][i]) for i in range(2): resSigma[load][i] = sum([(resMeasured[load][i][j] - resMean[load][i])**2 for j in range(len(resMeasured[load][i]))])/len(resMeasured[load][i])
def get_description_information(self): print str(strftime("%H:%M:%S", gmtime())) + ": Checking Description!" conn = open_connection() user_handle_list = get_twitter_urls(conn) error_sites = [] try: for user_handle in user_handle_list: #print "Checking " + user_handle user = self.api.get_user(user_handle) twitter_general_obj = {} twitter_general_obj["join_date"] = user.created_at twitter_general_obj["description"] = user.description twitter_general_obj["handle"] = user.screen_name twitter_general_obj["name"] = user.name twitter_general_obj["tweetcount"] = user.statuses_count twitter_general_obj["followercount"] = user.followers_count twitter_general_obj["location"] = user.location twitter_general_obj["desc_link"] = user.url twitter_general_obj["time_lookup"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") insert_in_handle_info(conn,twitter_general_obj) Statistics.inc_twitter_descriptions() except StandardError as e: Statistics.add_twitter_error_sites(user_handle + "\t" + str(e)) #error_sites.append(user_handle) except tweepy.TweepError as e: #message = str(e.message[0]['code']) + "\n" + str(e.args[0][0]['code']) Statistics.add_twitter_error_sites(user_handle + "\t" + str(e.message)) conn.close()
def get_reddit_posts(self): try: print str(strftime("%H:%M:%S", gmtime())) + ": Checking Reddit Posts!" for user_handle in self.user_handle_list: subreddit_obj = self.r.subreddit(user_handle) for submission in subreddit_obj.hot(limit=100): epoch = datetime.datetime.utcfromtimestamp(int(submission.created_utc)) reddit_post_obj = {} reddit_post_obj["url_post"] = submission.url """ Bug needs to be fixed """ """if(submission.url == "https://www.sec.gov/news/press-release/2017-184"): print subreddit_obj break""" reddit_post_obj["id"] = submission.id # Check if username is available or deleted hasName = getattr(submission,"name",None) if hasName: reddit_post_obj["name"] = submission.name else: reddit_post_obj["name"] = "[deleted]" # Check if author is availabe or deleted hasAuthor = getattr(submission,"name",None) if hasAuthor: reddit_post_obj["poster"] = submission.author.name else: reddit_post_obj["poster"] = "[deleted]" reddit_post_obj["title"] = submission.title.replace("'","") reddit_post_obj["url_comments"] = "https://www.reddit.com/r/"+user_handle+"/comments/"+submission.id reddit_post_obj["time_lookup"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") reddit_post_obj["timestamp"] = epoch.strftime("%Y-%m-%d %H:%M:%S") insert_in_reddit_post(self.conn,reddit_post_obj) Statistics.inc_reddit_post() #print reddit_post_obj # break except StandardError as e: Statistics.add_reddit_error_sites(user_handle + "\t" + str(e)+ " @ " + submission.url) #print "[get_reddit_posts()-ERROR at " + str(user_handle) + " @ " + submission.url + "\n"+str(e)
def initGraphMod(N,k,niter,alpha=None): """ Init graph, assets are drawn from a power law distribution, other information are chosen at random in ranged specified in the paper N: number of nodes alpha: power law exponent, if None is drawn at random in [1.5,5.0] returns: list of nodes information sorted by decreasing assets """ nodes = {} if alpha == None: alpha = np.random.uniform(1.5,5) sample = Statistics.powerlaw_sample(100, 10**10, alpha,N) for i in range(N): equity = np.random.uniform(0, 0.25) cash = np.random.uniform(0, 0.25) # node information nodes[i] = { 'ASSET': sample[i], 'EQUITY': equity, 'DEPOSITS': np.random.uniform(0,1-equity), 'CASH': cash, #'LOANS': np.random.uniform(k*(1-cash)/niter,(1-cash)), 'LOANS': np.random.uniform(k*(1-cash)/niter,(k+1)*(1-cash)/niter), # 'LOANS': np.random.uniform(0, 1-cash), # 0: False, 1: default, 2: failure, 3: exogenous 'BANKRUPT': 0 } # sorting sort = sorted(nodes.values(), key=lambda n: n['ASSET'], reverse=True) # nodes as dictionary nodes = {i: sort[i] for i in range(len(sort))} # undirected edges exp_degree = map(lambda x:nodes[x]['ASSET'],nodes.keys()) exp_degree = exp_degree / max(exp_degree) exp_degree = exp_degree * N g = nx.expected_degree_graph(exp_degree,selfloops=False) # remove cycles #g = nx.bfs_tree(g,0) for i in g.nodes(): g.node[i] = nodes[i] return g
if len(argv)!=7: usage(); exit() train_malware_path = argv[2] train_non_malware_path = argv[3] fprototypes = argv[4] f2wvmodel = argv[5] ffmodel = argv[6] Classifier._fit(train_malware_path,train_non_malware_path,True,fprototypes,f2wvmodel,ffmodel) elif argv[1]=="--predict": if len(argv)!=7: usage(); exit() exe_file,k,path_prototypes,path_w2v_model,path_f_model = argv[2],int(argv[3]),argv[4],argv[5],argv[6] it = clock() c_class = Classifier._predict(exe_file,k,path_prototypes,path_w2v_model,path_f_model) report(exe_file,c_class,clock()-it) elif argv[1]=="--statistics": if len(argv)!=6: usage(); exit() type_statistics = argv[2] k = int(argv[3]) train_malware_path = argv[4] train_non_malware_path = argv[5] if type_statistics=="-lou": Statistics._leaving_one_out(train_malware_path,train_non_malware_path,k) #elif type_statistics=="-bfc": Statistics._cross_validation(...) #elif type_statistics=="-hdo": Statistics._hold_out(...) #elif type_statistics=="-prt": Statistics._partition(...) #elif type_statistics=="-rst": Statistics._resubstitution(...) else: usage()
def median(self,in_val): return Statistics.median(list_in_values)
def residual(self,theta): return stat.norm(theta-self.theta)
def binning (raw, k): uncorr = _merge_bin (k, raw) err = st.err(uncorr) * 2 return st.mean (uncorr), err
def stats(self): return Statistics._leaving_one_out(MALW_PATH,NOMALW_PATH,1)
sel_bool_test = train == 0 sel_ind_train = np.where(sel_bool_train)[0] sel_ind_test = np.where(sel_bool_test)[0] f_train = features[sel_ind_train] f_test = features[sel_ind_test] # N approved = 1-rejected y_train = np.array(approved[sel_bool_train]).astype(int) y_test = np.array(approved[sel_bool_test]).astype(int) return f_train,f_test,y_train,y_test # CLASSIFIERS ''' clf1 = MultinomialNB().fit(f_train, y_train) probs = clf1.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) st.plotROC(fpr,tpr,roc_auc,"MultinomialNB") clf2 = LogisticRegression(penalty='l1').fit(f_train, y_train) probs = clf2.predict_proba(f_test) fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1]) roc_auc = auc(fpr,tpr) st.plotROC(fpr,tpr,roc_auc,"LogReg") clf3 = SGDClassifier(penalty='l1').fit(f_train, y_train)
def linurit4(self): stats = st.statistics() print(stats.getAvIncr(self.dfUtlendingarGisti)) print(stats.getAvIncrMonth(self.dfUtlendingarGisti,3)) stats.plotAll('Útlendingar gistinætur',self.dfUtlendingarGisti, months = [8,9,10,11])