def import_export(): path = approot.get_dataset('Data.csv') data = pd.read_csv(path) print(data) path = approot.get_dataset('Data.pickle') data.to_pickle(path)
def relative_blocks(blockname): relative_block_name = list() relativeBlockFile = approot.get_dataset("relativeBlockName.json") file = open(relativeBlockFile, 'r', encoding='utf-8') jsonObject = json.load(file) if blockname in jsonObject.keys(): for key in jsonObject[blockname]: relative_block_name.append(key) return relative_block_name
def readCSV2(filename): data = pd.read_csv(filename) dataGroup = data.groupby("name") result = dict() tempdict = dict() for name, groupstack in dataGroup: tempdict.clear() temp = groupstack.sort_values(by="count", ascending=False) for index, row in temp.iterrows(): tempdict[row["blockname"]] = row["count"] result[name] = tempdict.copy() filename = approot.get_dataset("relativeBlockName.json") file = open(filename, 'w', encoding='utf-8') json.dump(result, file, ensure_ascii=False)
def readCSV(filename): pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.width', 300) data = pd.read_csv(filename) data.columns = ['id', 'name', 'parent_json', 'children_json'] blockname = pd.DataFrame(columns=['name', 'blockname', 'count']) for index, row in data.iterrows(): objects = json.loads(row['children_json']) print((row['id'], row['name'])) for object in objects: blockname = blockname.append( pd.DataFrame({"name": row['name'], "blockname": object['name'], "count": object['count']}, index=["0"]), ignore_index=True) print(blockname) blockFile = approot.get_dataset("blocknames.csv") blockname.to_csv(blockFile)
def save_model(): clf = SVC() iris = datasets.load_iris() X, y = iris.data, iris.target clf.fit(X, y) # method pickle 存放数据 # pickle_file = approot.get_root('clf.pickle') # with open(pickle_file, 'wb') as f: # pickle.dump(clf, f) # with open(pickle_file, 'rb') as f: # clf2 = pickle.load(f) # print(clf2.predict(X[0:1])) # method 2:joblib pickle_file = approot.get_dataset('joblib.pickle') # joblib.dump(clf,pickle_file) clf3 = joblib.load(pickle_file) print(clf3.predict(X[0:1]))
pd.DataFrame({"name": row['name'], "blockname": object['name'], "count": object['count']}, index=["0"]), ignore_index=True) print(blockname) blockFile = approot.get_dataset("blocknames.csv") blockname.to_csv(blockFile) # 读取文档排序后保存 def readCSV2(filename): data = pd.read_csv(filename) dataGroup = data.groupby("name") result = dict() tempdict = dict() for name, groupstack in dataGroup: tempdict.clear() temp = groupstack.sort_values(by="count", ascending=False) for index, row in temp.iterrows(): tempdict[row["blockname"]] = row["count"] result[name] = tempdict.copy() filename = approot.get_dataset("relativeBlockName.json") file = open(filename, 'w', encoding='utf-8') json.dump(result, file, ensure_ascii=False) if __name__ == '__main__': # filename = approot.get_dataset("SELECT_t___FROM_demo_crawlWeight2_t.csv") # readCSV(filename) filename = approot.get_dataset("blocknames.csv") readCSV2(filename)
for key, value in relations.items(): if value == deviceId: flag = key break return flag def test3(): font = fm.FontProperties(fname='HYQiHei-25J.ttf') name_list = ['星期一', '星期二', '星期三', '星期四'] num_list = [1.5, 0.6, 7.8, 6] plt.bar(range(len(num_list)), num_list, color='rgb', tick_label=name_list) plt.xticks(fontproperties=font) plt.show() if __name__ == '__main__': # select_DEVICE_ID_CONTEXT_ID_from_DWB_DA_8_27.csv 设备id 864621038192553 的行为记录 file = approot.get_dataset( 'select_DEVICE_ID_CONTEXT_ID__from_DWB_DA_2018-9-4.csv') readCsv(file=file) # wordCloudDemo(' '.join(searchKey.values)) # contentIds = getContentNum(contentId) # df = searchUrl(contentIds) # drawPicture(df) # test2() # test() # test3()
def startup(phone): filename = approot.get_dataset( "select_DEVICE_ID_CONTEXT_ID__from_DWB_DA.csv") deivceId = relation(phone) data = read_history2(filename, str(deivceId))
from app import approot import pandas as pd import numpy as np from sklearn.preprocessing import Imputer from sklearn.preprocessing import LabelEncoder, OneHotEncoder from sklearn.cross_validation import train_test_split from sklearn.preprocessing import StandardScaler def print_parameter(X, Y): print(X) print(Y) if __name__ == '__main__': path = approot.get_dataset('Data.csv') dataset = pd.read_csv(path) X = dataset.iloc[:, :-1].values Y = dataset.iloc[:, 3].values print_parameter(X, Y) imputer = Imputer(missing_values="NaN", strategy="mean", axis=0) imputer = imputer.fit(X[:, 1:3]) X[:, 1:3] = imputer.transform(X[:, 1:3]) print_parameter(X, Y) labelencoder_X = LabelEncoder() X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) print_parameter(X, Y)