def getDay(day, derived_day, month, year): if (day == '*'): if not math.isnan((derived_day)): return int(derived_day) else: return calendar.monthrange(year, month)[1] return day
def reposicionar_centroids(self, centroids: list, matriz: pd.DataFrame): print("===================") print(f"Centroid Entrada: {centroids}") self.centroids: list = centroids for _indice in range(0, len(centroids)): # Filtrar pro grupo de centroids filtro = f"centroid_id == 'centroid_{_indice}'" sub_conjunto: pd.DataFrame = matriz.query(filtro) if math.isnan(sub_conjunto.mean().ponto_x) or math.isnan(sub_conjunto.mean().ponto_y): continue # self.centroids[_indice] = [round(sub_conjunto.mean().ponto_x), round(sub_conjunto.mean().ponto_y)] self.centroids[_indice] = [sub_conjunto.mean().ponto_x, sub_conjunto.mean().ponto_y] print(f"Centroid Saida: {self.centroids}") return self.centroids
def getYear(year, derived_year): if (year == '*'): if not math.isnan(derived_year): return int(derived_year) else: #raise ValueError('Can not derive year') return -1 else: return int(year)
def getClosureForFolder(v): folder = v['folder'] if (folder == 'folder'): piece = v['piece'] item = v['item'] s1 = df.loc[df['piece'].eq(piece) & (df['item'].eq(item) if not math.isnan(item) else True) & (df['closure_type'].str.startswith('open_on_transfer'))] if (s1.empty): #check if I have closed_until or unknown_status s2 = df.loc[df['piece'].eq(piece) & (df['item'].eq(item) if not math.isnan(item) else True) & (df['closure_type'].str.startswith('closed_until'))] if (s2.empty): return 'unknown_status' else: return 'closed_until' else: return 'open_on_transfer' else: return v['closure_type']
def populateClosureStartDate(v): from datetime import datetime dateOfBirth = v['date_of_birth'] piece = v['piece'] item = v['item'] closure_type = v['closure_type'] if (closure_type == 'closed_until'): if (pd.isnull(dateOfBirth)): #if I don't have a date of birth, I must copy the latest date of birth from the files df1 = df.loc[df['piece'].eq(piece) & (df['item'].eq(item) if not math.isnan(item) else True)]['date_of_birth'].copy() df1.sort(ascending=False) latestDate = df1.iloc[0] return (datetime.strftime(latestDate, '%Y-%m-%dT%H:%M:%S')) else: return str(datetime.strftime(dateOfBirth, '%Y-%m-%dT%H:%M:%S'))
def angle_between(v1, v2): """ Returns the angle in radians between vectors 'v1' and 'v2':: >>> angle_between((1, 0, 0), (0, 1, 0)) 1.5707963267948966 >>> angle_between((1, 0, 0), (1, 0, 0)) 0.0 >>> angle_between((1, 0, 0), (-1, 0, 0)) 3.141592653589793 """ v1_u = unit_vector(v1) v2_u = unit_vector(v2) angle = np.arccos(np.dot(v1_u, v2_u)) if math.isnan(angle): if (v1_u == v2_u).all(): return 0.0 else: return 180 return math.degrees(angle)
def main(): args = parse_args() creator.create("Maximum", base.Fitness, weights=(1.0, )) creator.create("Minimum", base.Fitness, weights=(-1.0, )) fitness = creator.Minimum if args.minimum else creator.Maximum creator.create("Particle", list, exemplar=list, best=None, no_improvement_counter=0, fitness=fitness, fragrance=float) best_histories = [] epochs = [] accuracies = [] best_values = [] for i in range(args.iteration): result = run_boa(args) best_values.append(result[1]) save_fitness_history("../results/" + args.logCatalog + "/", result[4]) best_histories.append(result[3]) accuracies.append(result[2]) if accuracies[i] <= args.accuracy: epochs.append(result[0]) save_best_fitness_history("../results/" + args.logCatalog + "/", best_histories) filtered_best_values = [ value for value in best_values if not (math.isinf(value) or math.isnan(value)) ] display_and_save_results(epochs, filtered_best_values, accuracies, args.accuracy, "../results/" + args.logCatalog + "/")
dict_data['fsa:CashAndCashEquivalents'] = datalisten[i, dataslut] dict_data['fsa:CashAndCashEquivalents_prev'] = datalisten[ i, dataslut - 1] if datalisten[i, 1] == 20000: dict_data['fsa:Equity'] = datalisten[i, dataslut] dict_data['fsa:Equity_prev'] = datalisten[i, dataslut - 1] if datalisten[i, 1] == 49500: dict_data['fsa:ProfitLoss'] = datalisten[i, dataslut] dict_data['fsa:ProfitLoss_prev'] = datalisten[i, dataslut - 1] if datalisten[i, 1] == 49100: dict_data[ 'fsa:ProfitLossFromOrdinaryOperatingActivities'] = datalisten[ i, dataslut] dict_data[ 'fsa:ProfitLossFromOrdinaryOperatingActivities_prev'] = datalisten[ i, dataslut - 1] slettes = [] for noegle in dict_data.keys(): if math.isnan(dict_data[noegle]): slettes.append(noegle) for noegle in slettes: del dict_data[noegle] transform_spain = spain_to_dict() transformed_data = transform_spain.transform([dict_data]) output.append(transformed_data[0]) print(clf_EW_spain.predict([dict_data]), clf_EW_spain.predict_proba([dict_data]))
def sectionSpeed(numOfHours,numOfDays,trainData,firstTime,n,minLon, lonLen, minLat, latLen): '''计算路段平均速度[地区号][时段] n表示将整个地图分成n*n个网格,地区号是其一维下标''' #缺乏数据默认的速度 defaultVel=5.0 #初始化三维数组speed[地区号][时段][天数],值为i地区j时段第k天的速度 speed=[] for i in range(n*n): li=[] speed.append(li) for j in range(numOfHours): lj=[] li.append(lj) for k in range(numOfDays): lj.append(0.0) for file in trainData: df=pandas.read_csv(file, header=None, names=["taxiId","lat","lon","busy","time"], dtype={"taxiId":numpy.int16,"lat":numpy.double,"lon":numpy.double, "busy":numpy.int8,"time":numpy.str}) #df按照出租车id,时间点升序排序 df.sort_values(by=["taxiId","time"], axis=0, ascending=[True,True], inplace=True) taxiId1=-1 lat1=0 lon1=0 sectionId1=0 time1=firstTime temp=[] for i in range(n*n): li=[] for row in df.itertuples(index=False): taxiId2=row[0] lat2=row[1] lon2=row[2] time2=datetime.datetime.strptime(row[4],"%Y/%m/%d %H:%M:%S") if taxiId1==taxiId2 and time1.hour==time2.hour: v=calDistance(lon1, lat1, lon2, lat2)/((time2-time1).seconds) # print(calDistance(lon1, lat1, lon2, lat2)) # print((time2-time1).seconds) # print("seid="+str(sectionId1)+"hourid="+str(time1.hour-firstTime.hour)+"dayId="+str((time1-firstTime).days)) speed[sectionId1][time1.hour-firstTime.hour][(time1-firstTime).days].append(v) taxiId1=taxiId2;lat1=lat2;lon1=lon2;time1=time2 # print(sectionId1) sectionId1=calSectionId(lon1, lat1,minLon, lonLen, minLat, latLen, n) res=[] for i in range(n*n): li=[] res.append(li) for j in range(numOfHours): temp=[] for k in range(numOfDays): temp.append(numpy.mean(speed[i][j][k])) if math.isnan(temp[0]): li.append(defaultVel) else: li.append(numpy.mean(temp)) return res
def is_nan(x): return isinstance(x, float) and math.isnan(x)
import json from numpy import math try: litindex_json_files = glob.glob("./rawdata/*.json") conn = psycopg2.connect( "dbname='litindex' user='******' host='0.0.0.0' password='******'") cur = conn.cursor() for onefile in litindex_json_files: json_records = pd.read_json(onefile, lines=True) for index, row in json_records.iterrows(): print(row['id']) # print(row['institution_id']) rowId = row['id'] if math.isnan(rowId): rowId = 0 # print("NAN row_id =",rowId) rowInstitutionId = row['institution_id'] if math.isnan(rowInstitutionId): rowInstitutionId = 0.0 # print("NAN rowInstitutionId =",rowInstitutionId) year = row['year'] if math.isnan(year): year = 0 # print("NAN year =",year) cur.execute( "INSERT INTO open_syllabi(id, source_url, source_anchor, syllabus_probability, year, field_name, institution_id, grid_name, grid_country_code, text_md5, text) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);", ( rowId, row['source_url'],
('fsa:ProfitLoss_prev', 92), ('fsa:ProfitLoss', 93), ): vaerdi = datalist[virksomhed][col] dict_input_poland[tekst] = vaerdi dict_input_poland['fsa:Assets'] = np.nan_to_num( dict_input_poland['fsa:NoncurrentAssets']) + np.nan_to_num( dict_input_poland['fsa:CurrentAssets']) + np.nan_to_num( dict_input_poland['fsa:Prepayments']) dict_input_poland['fsa:Assets_prev'] = np.nan_to_num( dict_input_poland['fsa:NoncurrentAssets_prev']) + np.nan_to_num( dict_input_poland['fsa:CurrentAssets_prev']) + np.nan_to_num( dict_input_poland['fsa:Prepayments_prev']) slettes = [] for noegle in dict_input_poland.keys(): if math.isnan(dict_input_poland[noegle]): slettes.append(noegle) for noegle in slettes: del dict_input_poland[noegle] transform_poland = Polish_to_dict() transformed_data = transform_poland.transform([dict_input_poland]) output.append(transformed_data[0]) #print(dict_input_poland) print(virksomhed, clf_EW_poland.predict_proba([dict_input_poland])) #df_out = pd.DataFrame.from_dict(output) # #import matplotlib.pyplot as plt ##print(df_out.columns)