Пример #1
0
 def writer_moredata(self, filepath, filename, valuetypes, start, end,
                     tablename):
     res = writer_data_table()
     with savReaderWriter.SavReader(os.path.join(filepath, filename),
                                    ioUtf8=True) as read:
         # 如果不用ioutf8, 汉字十六进制\被转义,更麻烦
         try:
             for i in read:
                 i = i[start:end]
                 for j in range(len(valuetypes)):
                     # 数据库不认unicode所以要转换下
                     # 将varchar进行json存如数据库
                     if valuetypes[j] == "DATETIME":
                         i[j] = read.spss2strDate(i[j], '%Y-%m-%d %H:%M:%S',
                                                  None)
                     elif valuetypes[j] == "DATE":
                         i[j] = read.spss2strDate(i[j], '%Y-%m-%d', None)
                     elif valuetypes[j] == "VARCHAR":
                         i[j] = i[j]
                 res.insert_sql(tablename, i)
         except Exception as e:
             my_log.error(e)
         finally:
             my_log.info("data write database success !!!")
     res.close()
Пример #2
0
 def select_sql(self, sql):
     try:
         query_result = self.conn.select_sql(sql)
     except Exception as e:
         my_log.error(e)
         return
     return query_result
Пример #3
0
 def insert_sql(self, sql, value):
     # adu: insert, delete, update的简写
     try:
         lastrowid = self.conn.insert_sql(sql, value)
     except Exception as e:
         my_log.error(e)
         return
     return lastrowid
Пример #4
0
 def adu_sql(self, sql):
     # adu: insert, delete, update的简写
     try:
         self.conn.run_manysql(sql)
     except Exception as e:
         my_log.error(e)
         return
     return 2000
def MyVariance(df_dropna, variableOne, variableTwo):
    try:
        # df_dropna = MyVarianceModel(variableOne, variableTwo, table, where)
        flag = 1
        expr = '{}~C({})'.format(variableOne, variableTwo)
        v2sum = 0
        for i in range(len(df_dropna[variableTwo])):
            if (df_dropna[variableTwo]).iloc[0] == (
                    df_dropna[variableTwo]).iloc[i]:
                v2sum += 1
        if v2sum == len(df_dropna[variableTwo]):
            flag = 0

        if flag == 1:
            mod = ols(expr, data=df_dropna).fit()
            anova_table = sm.stats.anova_lm(mod)
            ret = {
                'df': list(anova_table.df),
                'sum_sq': list(anova_table.sum_sq),
                'mean_sq': list(anova_table.mean_sq),
                'F': list(anova_table.F)[0],
                'P': list(anova_table.values.T[-1])[0]
            }
        else:
            ret = {
                "df": "NAN",
                "sum_sq": "NAN",
                "mean_sq": "NAN",
                "F": "NAN",
                "P": "NAN"
            }  # P大写

    except Exception as e:
        my_log.error(e)
        ret = {
            "df": "NAN",
            "sum_sq": "NAN",
            "mean_sq": "NAN",
            "F": "NAN",
            "P": "NAN"
        }

    return ret
Пример #6
0
    def main(self, filepath, filename, user_id, project_name):
        """
        # print(formats):{'Q8': 'A400', 'Q3R6': 'F5', 'Q5R3': 'F5', }
        # print(varnames)['ID', 'StartTime', 'EndTime', 'VerNo', 'Q1R3',]
        # print(varLabels){'Q8': 'Q2. 学号', 'Q3R6': 'F2.2\u3000请根据你的实际情况,
        # print(valueLabels){'Q3R6': {1.0: '非常不符合', 2.0: '比较不符合',
        # print("vartypes", my_vartypes) ['A20', 'DATETIME40', 'DATETIME40',
        # print(width) ['20', '40', '40', '5', '5', '5', '5',
        # print(my_valuetypes) ['VARCHAR', 'DATETIME', 'DATETIME', 'INT',
        # print(float_width)[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        """

        # filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "file", filename)
        FilePathName = os.path.join(filepath, filename)
        # 得到文件信息
        formats, varnames, varLabels, valueLabels = self.read_sav(FilePathName)
        my_vartypes, my_width, my_valuetypes = self.get_spss_data(
            formats, varnames)
        float_width = self.float_data(self.width)
        for i in range(len(my_vartypes)):
            if my_vartypes[i].startswith("F"):
                if my_vartypes[i].split(".")[1:]:
                    pass
                else:
                    my_vartypes[i] = my_vartypes[i] + ".0"

        insert_project = insert_project_infor()
        # project表
        project_id = insert_project.select_project_id(
            user_id, project_name)[-1]["proj_id"]
        if not project_id:
            project_id = insert_project.insert_project(user_id, project_name)

        # 先查dataset的id,然后在插入
        # dataset_id, proj_id, dataset_name, datatable_name, origin_filepath, origin_filetype
        dataset_id_group = insert_project.select_dataset_id(project_id)
        if not dataset_id_group:
            dataset_id = 1
        else:
            dataset_id = dataset_id_group[-1]["dataset_id"] + 1

        # 创建表
        # 不允许超过1024列MySQL, 超过了分表
        nowtime = datetime.datetime.now().strftime("%Y%m%d")
        new_time1 = "%.6f" % float(time.time())
        new_time3 = new_time1.split(".")[0] + new_time1.split(".")[1]

        if len(varnames) < 1024:
            num = 1
            table_subname = "u" + str(user_id) + "_" + str(
                nowtime) + "_" + str(new_time3) + "_" + str(num)
            try:
                create_data_table(my_vartypes, my_width, my_valuetypes,
                                  formats, varnames, table_subname)
                self.writer_data(filepath, filename, my_valuetypes,
                                 table_subname)
                insert_project.insert_dataset(dataset_id, project_id, filename,
                                              table_subname, filepath, ".sav")
            except Exception as e:
                my_log.error(e)
        else:
            try:
                integer, remainder = divmod(len(varnames), 800)
                if remainder:
                    integer += 1
                for num in range(1, integer + 1):
                    # table_subname = filename + "_" + str(num)
                    table_subname = "u" + str(user_id) + "_" + str(
                        nowtime) + "_" + str(new_time3) + "_" + str(num)
                    insert_project.insert_dataset(dataset_id, project_id,
                                                  filename, table_subname,
                                                  filepath, ".sav")
                    start = num * 800 - 800
                    end = num * 800
                    sub_formats = {}
                    for sub_for in varnames[start:end]:
                        sub_formats[sub_for] = formats[sub_for]

                    create_data_table(
                        my_vartypes[start:end],
                        my_width[start:end],
                        my_valuetypes[start:end],
                        sub_formats,  # formats
                        varnames[start:end],
                        table_subname)
                    self.writer_moredata(filepath, filename,
                                         my_valuetypes[start:end], start, end,
                                         table_subname)
            except Exception as e:
                my_log.error(e)

        insert_project.close()
        # 信息表值创建一个
        # create_information_tables(filename)
        # 写入数据
        try:
            self.insert_sub_table(filename, varnames, my_valuetypes, my_width,
                                  float_width, varLabels, valueLabels,
                                  my_vartypes, project_id, dataset_id)
        except Exception as e:
            my_log.error(e)