def writer_moredata(self, filepath, filename, valuetypes, start, end, tablename): res = writer_data_table() with savReaderWriter.SavReader(os.path.join(filepath, filename), ioUtf8=True) as read: # 如果不用ioutf8, 汉字十六进制\被转义,更麻烦 try: for i in read: i = i[start:end] for j in range(len(valuetypes)): # 数据库不认unicode所以要转换下 # 将varchar进行json存如数据库 if valuetypes[j] == "DATETIME": i[j] = read.spss2strDate(i[j], '%Y-%m-%d %H:%M:%S', None) elif valuetypes[j] == "DATE": i[j] = read.spss2strDate(i[j], '%Y-%m-%d', None) elif valuetypes[j] == "VARCHAR": i[j] = i[j] res.insert_sql(tablename, i) except Exception as e: my_log.error(e) finally: my_log.info("data write database success !!!") res.close()
def select_sql(self, sql): try: query_result = self.conn.select_sql(sql) except Exception as e: my_log.error(e) return return query_result
def insert_sql(self, sql, value): # adu: insert, delete, update的简写 try: lastrowid = self.conn.insert_sql(sql, value) except Exception as e: my_log.error(e) return return lastrowid
def adu_sql(self, sql): # adu: insert, delete, update的简写 try: self.conn.run_manysql(sql) except Exception as e: my_log.error(e) return return 2000
def MyVariance(df_dropna, variableOne, variableTwo): try: # df_dropna = MyVarianceModel(variableOne, variableTwo, table, where) flag = 1 expr = '{}~C({})'.format(variableOne, variableTwo) v2sum = 0 for i in range(len(df_dropna[variableTwo])): if (df_dropna[variableTwo]).iloc[0] == ( df_dropna[variableTwo]).iloc[i]: v2sum += 1 if v2sum == len(df_dropna[variableTwo]): flag = 0 if flag == 1: mod = ols(expr, data=df_dropna).fit() anova_table = sm.stats.anova_lm(mod) ret = { 'df': list(anova_table.df), 'sum_sq': list(anova_table.sum_sq), 'mean_sq': list(anova_table.mean_sq), 'F': list(anova_table.F)[0], 'P': list(anova_table.values.T[-1])[0] } else: ret = { "df": "NAN", "sum_sq": "NAN", "mean_sq": "NAN", "F": "NAN", "P": "NAN" } # P大写 except Exception as e: my_log.error(e) ret = { "df": "NAN", "sum_sq": "NAN", "mean_sq": "NAN", "F": "NAN", "P": "NAN" } return ret
def main(self, filepath, filename, user_id, project_name): """ # print(formats):{'Q8': 'A400', 'Q3R6': 'F5', 'Q5R3': 'F5', } # print(varnames)['ID', 'StartTime', 'EndTime', 'VerNo', 'Q1R3',] # print(varLabels){'Q8': 'Q2. 学号', 'Q3R6': 'F2.2\u3000请根据你的实际情况, # print(valueLabels){'Q3R6': {1.0: '非常不符合', 2.0: '比较不符合', # print("vartypes", my_vartypes) ['A20', 'DATETIME40', 'DATETIME40', # print(width) ['20', '40', '40', '5', '5', '5', '5', # print(my_valuetypes) ['VARCHAR', 'DATETIME', 'DATETIME', 'INT', # print(float_width)[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, """ # filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "file", filename) FilePathName = os.path.join(filepath, filename) # 得到文件信息 formats, varnames, varLabels, valueLabels = self.read_sav(FilePathName) my_vartypes, my_width, my_valuetypes = self.get_spss_data( formats, varnames) float_width = self.float_data(self.width) for i in range(len(my_vartypes)): if my_vartypes[i].startswith("F"): if my_vartypes[i].split(".")[1:]: pass else: my_vartypes[i] = my_vartypes[i] + ".0" insert_project = insert_project_infor() # project表 project_id = insert_project.select_project_id( user_id, project_name)[-1]["proj_id"] if not project_id: project_id = insert_project.insert_project(user_id, project_name) # 先查dataset的id,然后在插入 # dataset_id, proj_id, dataset_name, datatable_name, origin_filepath, origin_filetype dataset_id_group = insert_project.select_dataset_id(project_id) if not dataset_id_group: dataset_id = 1 else: dataset_id = dataset_id_group[-1]["dataset_id"] + 1 # 创建表 # 不允许超过1024列MySQL, 超过了分表 nowtime = datetime.datetime.now().strftime("%Y%m%d") new_time1 = "%.6f" % float(time.time()) new_time3 = new_time1.split(".")[0] + new_time1.split(".")[1] if len(varnames) < 1024: num = 1 table_subname = "u" + str(user_id) + "_" + str( nowtime) + "_" + str(new_time3) + "_" + str(num) try: create_data_table(my_vartypes, my_width, my_valuetypes, formats, varnames, table_subname) self.writer_data(filepath, filename, my_valuetypes, table_subname) insert_project.insert_dataset(dataset_id, project_id, filename, table_subname, filepath, ".sav") except Exception as e: my_log.error(e) else: try: integer, remainder = divmod(len(varnames), 800) if remainder: integer += 1 for num in range(1, integer + 1): # table_subname = filename + "_" + str(num) table_subname = "u" + str(user_id) + "_" + str( nowtime) + "_" + str(new_time3) + "_" + str(num) insert_project.insert_dataset(dataset_id, project_id, filename, table_subname, filepath, ".sav") start = num * 800 - 800 end = num * 800 sub_formats = {} for sub_for in varnames[start:end]: sub_formats[sub_for] = formats[sub_for] create_data_table( my_vartypes[start:end], my_width[start:end], my_valuetypes[start:end], sub_formats, # formats varnames[start:end], table_subname) self.writer_moredata(filepath, filename, my_valuetypes[start:end], start, end, table_subname) except Exception as e: my_log.error(e) insert_project.close() # 信息表值创建一个 # create_information_tables(filename) # 写入数据 try: self.insert_sub_table(filename, varnames, my_valuetypes, my_width, float_width, varLabels, valueLabels, my_vartypes, project_id, dataset_id) except Exception as e: my_log.error(e)