示例#1
0
def wirte_to_mysqldb(df, result_tb = 'data_health_examination', user='******', \
                     psw='liangzhi123', host='192.168.1.22', db='datamining', \
                     if_exists='append', dtype={u'db_name':sqltypes.NVARCHAR(length=255),
                                                 u'table_name':sqltypes.NVARCHAR(length=255),
                                                 u'part_date':sqltypes.NVARCHAR(length=255),
                                                 u'create_date':sqltypes.DateTime(),
                                                 u'field_name':sqltypes.NVARCHAR(length=255),
                                                 u'field_type':sqltypes.NVARCHAR(length=255),
                                                 u'missing_value_num':sqltypes.BigInteger(),
                                                 u'missing_value_prop':sqltypes.Float(),
                                                 u'other_missing_value_num':sqltypes.BigInteger(),
                                                 u'other_missing_value_prop':sqltypes.Float(),
                                                 u'abnormal_value_index':sqltypes.Text(),
                                                 u'abnormal_value_num':sqltypes.BigInteger(),
                                                 u'abnormal_value_prop':sqltypes.Float(),
                                                 u'if_exist_probability_plot':sqltypes.Integer(),
                                                 u'probability_plot_result':sqltypes.NVARCHAR(length=255),
                                                 u'probability_plot_script':sqltypes.Text(),
                                                 u'if_exist_frequency_plot':sqltypes.Integer(),
                                                 u'frequency_plot_result':sqltypes.NVARCHAR(length=255),
                                                 u'frequency_plot_script':sqltypes.Text(),
                                                 u'if_exist_rules':sqltypes.Integer(),
                                                 u'show_Chn_rules':sqltypes.NVARCHAR(length=255),
                                                 u'show_Eng_rules':sqltypes.NVARCHAR(length=255),
                                                 u'rules_result':sqltypes.NVARCHAR(length=255)}):
    engine = sqlalchemy.create_engine(str(r"mysql+mysqldb://%s:" + '%s' + "@%s/%s?%s")\
                                      % (user, psw, host, db, 'charset=utf8'))
    df.to_sql(result_tb, engine, if_exists=if_exists, index=False, dtype=dtype)
示例#2
0
def save_models_to_sql_helper(trained_models,
                              ablation_set,
                              prefix,
                              if_exists='replace'):
    method = 'default'
    dfs = []
    for model in trained_models:
        cv = trained_models[model]
        k_range = cv.best_k[method]['k_range']
        for metric in models.METRICS:
            if metric in cv.results[method].keys():
                results = cv.results[method][metric]
                df = pd.DataFrame(results, columns=k_range)
                df['metric'] = metric.decode('utf-8', 'ignore')
                df['model'] = model
                dfs += [df]

    name = "%s_%s" % (prefix, ablation_set)

    df = pd.concat(dfs, axis=0, ignore_index=True)
    typedict = {
        col_name: types.Float(precision=5, asdecimal=True)
        for col_name in df
    }
    typedict['metric'] = types.NVARCHAR(length=255)
    typedict['model'] = types.NVARCHAR(length=255)
    df.to_sql(name, cnx, if_exists=if_exists, dtype=typedict)
示例#3
0
    def __init__(self):
        """Map the dtypes of your database to our definitions."""
        # if you have unicode it's best to use NVARCHAR
        self._textshort = types.NVARCHAR(length=40)
        self._textmiddle = types.NVARCHAR(length=400)
        self._textlong = types.NVARCHAR(length=4000)  # NVARCH4000 is max
        self._floaty = types.Float
        self._inty = types.INTEGER
        self._datey = types.DATE
        self._datetimey = types.DATETIME
        self._timey = types.TIME
        self._booly = types.BOOLEAN

        # Connection data, see the central config.yaml file.
        self._conn_data = setup.configs['AzureSQL']
示例#4
0
    def test_reflect_nvarchar(self, metadata, connection):
        Table(
            "tnv",
            metadata,
            Column("nv_data", sqltypes.NVARCHAR(255)),
            Column("c_data", sqltypes.NCHAR(20)),
        )
        metadata.create_all(connection)
        m2 = MetaData()
        t2 = Table("tnv", m2, autoload_with=connection)
        assert isinstance(t2.c.nv_data.type, sqltypes.NVARCHAR)
        assert isinstance(t2.c.c_data.type, sqltypes.NCHAR)

        if testing.against("oracle+cx_oracle"):
            assert isinstance(
                t2.c.nv_data.type.dialect_impl(connection.dialect),
                cx_oracle._OracleUnicodeStringNCHAR,
            )

            assert isinstance(
                t2.c.c_data.type.dialect_impl(connection.dialect),
                cx_oracle._OracleNChar,
            )

        data = "m’a réveillé."
        connection.execute(t2.insert(), dict(nv_data=data, c_data=data))
        nv_data, c_data = connection.execute(t2.select()).first()
        eq_(nv_data, data)
        eq_(c_data, data + (" " * 7))  # char is space padded
        assert isinstance(nv_data, str)
        assert isinstance(c_data, str)
    def test_reflect_nvarchar(self):
        metadata = MetaData(testing.db)
        t = Table('t', metadata,
            Column('data', sqltypes.NVARCHAR(255))
        )
        metadata.create_all()
        try:
            m2 = MetaData(testing.db)
            t2 = Table('t', m2, autoload=True)
            assert isinstance(t2.c.data.type, sqltypes.NVARCHAR)

            if testing.against('oracle+cx_oracle'):
                # nvarchar returns unicode natively.  cx_oracle
                # _OracleNVarChar type should be at play here.
                assert isinstance(
                    t2.c.data.type.dialect_impl(testing.db.dialect), 
                    cx_oracle._OracleNVarChar)

            data = u'm’a réveillé.'
            t2.insert().execute(data=data)
            res = t2.select().execute().first()['data']
            eq_(res, data)
            assert isinstance(res, unicode)
        finally:
            metadata.drop_all()
示例#6
0
    def test_reflect_nvarchar(self):
        metadata = self.metadata
        Table(
            "tnv",
            metadata,
            Column("nv_data", sqltypes.NVARCHAR(255)),
            Column("c_data", sqltypes.NCHAR(20)),
        )
        metadata.create_all()
        m2 = MetaData(testing.db)
        t2 = Table("tnv", m2, autoload=True)
        assert isinstance(t2.c.nv_data.type, sqltypes.NVARCHAR)
        assert isinstance(t2.c.c_data.type, sqltypes.NCHAR)

        if testing.against("oracle+cx_oracle"):
            assert isinstance(
                t2.c.nv_data.type.dialect_impl(testing.db.dialect),
                cx_oracle._OracleUnicodeStringNCHAR,
            )

            assert isinstance(
                t2.c.c_data.type.dialect_impl(testing.db.dialect),
                cx_oracle._OracleNChar,
            )

        data = u("m’a réveillé.")
        with testing.db.connect() as conn:
            conn.execute(t2.insert(), dict(nv_data=data, c_data=data))
            nv_data, c_data = conn.execute(t2.select()).first()
            eq_(nv_data, data)
            eq_(c_data, data + (" " * 7))  # char is space padded
            assert isinstance(nv_data, util.text_type)
            assert isinstance(c_data, util.text_type)
示例#7
0
def save_domain_adapt_to_sql_helper(da, model_name, if_exists='replace'):
    dfs = []
    name = "%s_%s" % (DOMAIN_ADAPTATION_RESULTS_PREFIX, model_name)
    for method in da.methods:
        k_range = da.best_k[method]['k_range']
        # for metric in ['roc', 'fms', 'acc']:
        for metric in ['fms', 'acc']:
            if metric in da.results[method].keys():
                results = da.results[method][metric]
                df = pd.DataFrame(results, columns=k_range)
                df['metric'] = metric.decode('utf-8', 'ignore')
                df['method'] = method.decode('utf-8', 'ignore')
                dfs += [df]

    df = pd.concat(dfs, axis=0, ignore_index=True)
    typedict = {
        col_name: types.Float(precision=5, asdecimal=True)
        for col_name in df
    }
    typedict['metric'] = types.NVARCHAR(length=255)
    typedict['method'] = types.NVARCHAR(length=255)
    df.to_sql(name, cnx, if_exists=if_exists, dtype=typedict)
示例#8
0
def gen_types_from_pandas_to_sql(table):
    r"""
    Generate a dictionnary with the database types related to the dataframe dtypes
    """
    dtypedict = {}
    for i, j in zip(table.columns, table.dtypes):
        if 'object' in str(j):
            dtypedict.update({i: types.NVARCHAR(length=500)})
        if 'datetime' in str(j):
            dtypedict.update({i: types.DateTime()})
        if 'float' in str(j):
            dtypedict.update({i: types.Float(precision=3, asdecimal=True)})
            dtypedict.update({
                i: types.NVARCHAR(length=500)
            })  # Overwrite FG to avoid Out of range value for column issue
        if 'int' in str(j):
            if max([l for l in table[i].tolist() if not pd.isnull(l)
                    ]) > cp.INT_LIMIT:
                dtypedict.update({i: types.BIGINT()})
            else:
                dtypedict.update({i: types.INT()})
    return dtypedict
示例#9
0
    def test_reflect_nvarchar(self):
        metadata = self.metadata
        Table('tnv', metadata, Column('data', sqltypes.NVARCHAR(255)))
        metadata.create_all()
        m2 = MetaData(testing.db)
        t2 = Table('tnv', m2, autoload=True)
        assert isinstance(t2.c.data.type, sqltypes.NVARCHAR)

        if testing.against('oracle+cx_oracle'):
            assert isinstance(t2.c.data.type.dialect_impl(testing.db.dialect),
                              cx_oracle._OracleUnicodeStringNCHAR)

        data = u('m’a réveillé.')
        t2.insert().execute(data=data)
        res = t2.select().execute().first()['data']
        eq_(res, data)
        assert isinstance(res, util.text_type)
示例#10
0
    def get_columns(self, connection, table_name, schema=None, **kwargs):
        schema = schema or self.default_schema_name

        result = connection.execute(
            sql.text(
                """SELECT COLUMN_NAME, DATA_TYPE_NAME, DEFAULT_VALUE, IS_NULLABLE, LENGTH, SCALE, COMMENTS FROM (
                   SELECT SCHEMA_NAME, TABLE_NAME, COLUMN_NAME, POSITION, DATA_TYPE_NAME, DEFAULT_VALUE, IS_NULLABLE,
                   LENGTH, SCALE, COMMENTS FROM SYS.TABLE_COLUMNS UNION ALL SELECT SCHEMA_NAME, VIEW_NAME AS TABLE_NAME,
                   COLUMN_NAME, POSITION, DATA_TYPE_NAME, DEFAULT_VALUE, IS_NULLABLE, LENGTH, SCALE, COMMENTS FROM
                   SYS.VIEW_COLUMNS) AS COLUMS WHERE SCHEMA_NAME=:schema AND TABLE_NAME=:table ORDER BY POSITION"""
            ).bindparams(
                schema=self.denormalize_name(schema),
                table=self.denormalize_name(table_name)
            )
        )

        columns = []
        for row in result.fetchall():
            column = {
                'name': self.normalize_name(row[0]),
                'default': row[2],
                'nullable': row[3] == "TRUE",
                'comment': row[6]
            }

            if hasattr(types, row[1]):
                column['type'] = getattr(types, row[1])
            elif hasattr(hana_types, row[1]):
                column['type'] = getattr(hana_types, row[1])
            else:
                util.warn("Did not recognize type '%s' of column '%s'" % (
                    row[1], column['name']
                ))
                column['type'] = types.NULLTYPE

            if column['type'] == types.DECIMAL:
                column['type'] = types.DECIMAL(row[4], row[5])
            elif column['type'] == types.VARCHAR:
                column['type'] = types.VARCHAR(row[4])
            elif column['type'] == types.NVARCHAR:
                column['type'] = types.NVARCHAR(row[4])

            columns.append(column)

        return columns
示例#11
0
    def column(self,tablename,mode=1,quotechar=False):
        cur = self.conn.cursor()
        if mode==1:
            sql = f" select COLUMN_NAME from user_tab_columns where table_name='{tablename.upper()}'"
            cur.execute(sql)
            columns = []
            for row in cur.fetchall():
                columns.append(row[0])

            if quotechar:
                columns = list(map(lambda x: f"'{x}'", columns))

        elif mode==2:
            from sqlalchemy import types
            sql = f" select COLUMN_NAME,DATA_TYPE,DATA_LENGTH,DATA_PRECISION,DATA_SCALE from user_tab_columns where table_name='{tablename.upper()}'"
            cur.execute(sql)
            columns={}
            for COLUMN_NAME,DATA_TYPE,DATA_LENGTH,DATA_PRECISION,DATA_SCALE in cur.fetchall():
                if DATA_TYPE=='VARCHAR2':
                    columns[COLUMN_NAME]=types.VARCHAR(DATA_LENGTH)
                elif DATA_TYPE=='NVARCHAR2':
                    columns[COLUMN_NAME] = types.NVARCHAR()
                elif DATA_TYPE=='CHAR':
                    columns[COLUMN_NAME] = types.CHAR
                elif DATA_TYPE=='DATE':
                    columns[COLUMN_NAME] = types.DateTime()
                elif DATA_TYPE=='NUMBER':
                    if DATA_SCALE is not None:
                        columns[COLUMN_NAME]=types.FLOAT(DATA_SCALE)
                    else :
                        columns[COLUMN_NAME] = types.INT
                else:
                    #TODO 抛出异常,不识别的类型,需要继续补充if判断
                    pass

        cur.close()
        return columns
def osm_delineation(param):
    """

    """
    osm.op_endpoint = param['osm']['op_endpoint']

    ########################################
    ### Load data

    # run_time_start = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M:%S')
    # print(run_time_start)

    ## Read in source data
    print('--Reading in source data...')

    json_lst = get_json_from_api(param['plan_limits']['api_url'], param['plan_limits']['api_headers'])
    json_lst1 = json_filters(json_lst, only_operative=True, only_reach_points=True)
    gjson1, hydro_units, pts_alt, sg1 = geojson_convert(json_lst1)

    combined_zones1 = [j for j in json_lst if j['id'] == param['other']['combined_zones_id']][0]
    combined_zones2 = [s['id'] for s in combined_zones1['spatialUnit']]

    no_limit1 = [j for j in json_lst if j['id'] == param['other']['no_limit_id']][0]
    no_limit2 = [s['id'] for s in no_limit1['spatialUnit']][0]

    # pts = mssql.rd_sql(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['pts']['table'], [param['gis_waterdata']['pts']['id']], where_in={param['gis_waterdata']['pts']['id']: pts_alt.id.unique().tolist()}, geo_col=True, username=param['gis_waterdata']['username'], password=param['gis_waterdata']['password'], rename_cols=[id_col])
    pts = mssql.rd_sql(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['pts']['table'], [param['gis_waterdata']['pts']['id']], where_in={param['gis_waterdata']['pts']['id']: pts_alt.id.unique().tolist()}, geo_col=True, rename_cols=[id_col])

    ## Point checks
    excluded_points = pts_alt[~pts_alt.id.isin(pts.SpatialUnitId)].copy()
    if not excluded_points.empty:
        print('These points are in the Plan Limits db, but have no GIS data:')
        print(excluded_points)

    bad_geo = pts[pts.geom_type != 'Point']
    if not bad_geo.empty:
        print('These points do not have a "Point" geometry (likely "MultiPoint"):')
        print(bad_geo)
        pts = pts[~pts.SpatialUnitId.isin(bad_geo.SpatialUnitId)].copy()

    cwms1 = mssql.rd_sql(param['gis_prod']['server'], param['gis_prod']['database'], param['gis_prod']['cwms']['table'], param['gis_prod']['cwms']['col_names'], rename_cols=param['gis_prod']['cwms']['rename_cols'], geo_col=True, username=param['gis_prod']['username'], password=param['gis_prod']['password'])

    # zones3 = mssql.rd_sql(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['allo_zones']['table'], [param['gis_waterdata']['allo_zones']['id']], where_in={param['gis_waterdata']['allo_zones']['id']: combined_zones2}, username=param['gis_waterdata']['username'], password=param['gis_waterdata']['password'], geo_col=True, rename_cols=[id_col])
    zones3 = mssql.rd_sql(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['allo_zones']['table'], [param['gis_waterdata']['allo_zones']['id']], where_in={param['gis_waterdata']['allo_zones']['id']: combined_zones2}, geo_col=True, rename_cols=[id_col])

    pts['geometry'] = pts.geometry.simplify(1)

    #######################################
    ### Run query
    print('--Pull out the waterways from OSM')

    pts1, bad_points = osm.get_nearest_waterways(pts, id_col, param['other']['search_distance'], 'all')

    waterways, nodes = osm.get_waterways(pts1, 'all')

    print('--Delineating Reaches from OSM')

    site_delin = osm.waterway_delineation(waterways, True)
    osm_delin = osm.to_osm(site_delin, nodes)
    gdf1 = osm.to_gdf(osm_delin)

    gdf2 = gdf1.to_crs(pts.crs)

    gdf3 = gdf2.merge(pts1.rename(columns={'id': 'start_node'})[['start_node', id_col]], on='start_node')

    print('--Pulling out all of Canterbury...')

    cant2 = osm.get_waterways_within_boundary(cwms1, buffer=0, waterway_type='all')

    combined1, poly1 = vector.pts_poly_join(cant2, zones3, id_col, op='intersects')
    gdf3 = gdf3[~gdf3.way_id.isin(combined1.way_id.unique())].copy()

    all_others1 = cant2[~cant2.way_id.isin(combined1.way_id)]
    all_others2 = all_others1[~all_others1.way_id.isin(gdf3.way_id.unique().tolist())].copy()
    all_others2[id_col] = no_limit2

    print('--Combine all reach data')

    gdf4 = pd.concat([gdf3, combined1, all_others2]).reset_index(drop=True)

    gdf4.rename(columns={'way_id': 'OSMWaterwayId', 'waterway': 'OSMWaterwayType', 'name': 'RiverName', 'start_node': 'StartNode'}, inplace=True)
    gdf4['OSMWaterwayId'] = gdf4['OSMWaterwayId'].astype('int64')

    print('--Compare existing reaches in the database')

    cols = gdf4.columns.drop('geometry').tolist()
    cols.extend(['OBJECTID'])

    # old1 = mssql.rd_sql(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], cols, username=param['gis_waterdata']['username'], password=param['gis_waterdata']['password'], geo_col=True)
    old1 = mssql.rd_sql(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], cols, geo_col=True)

    comp_dict = util.compare_dfs(old1.drop('OBJECTID', axis=1), gdf4, on=['SpatialUnitId', 'OSMWaterwayId'])
    new1 = comp_dict['new'].copy()
    diff1 = comp_dict['diff'].copy()
    rem1 = comp_dict['remove'][['SpatialUnitId', 'OSMWaterwayId']].copy()

    print('--Save to database')

    sql_dtypes = {'StartNode': types.BIGINT(), 'OSMWaterwayId': types.BIGINT(), 'RiverName': types.NVARCHAR(200), 'OSMWaterwayType': types.NVARCHAR(30), 'SpatialUnitId': types.NVARCHAR(8), 'SHAPE_': types.VARCHAR(), 'OBJECTID': types.INT(), 'ModifiedDate': types.DATETIME()}

    if not new1.empty:
        max_id = old1['OBJECTID'].max() + 1

        new1['ModifiedDate'] = today_str
        new1['OBJECTID'] = list(range(max_id, max_id + len(new1)))
        new1.rename(columns={'geometry': 'SHAPE'}, inplace=True)

        # mssql.update_table_rows(new1, param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], on=['SpatialUnitId', 'OSMWaterwayId'], index=False, append=True, username=param['gis_waterdata']['username'], password=param['gis_waterdata']['password'], geo_col='SHAPE', clear_table=False, dtype=sql_dtypes)
        mssql.update_table_rows(new1, param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], on=['SpatialUnitId', 'OSMWaterwayId'], index=False, append=True, geo_col='SHAPE', clear_table=False, dtype=sql_dtypes)

    if not diff1.empty:
        diff2 = pd.merge(diff1, old1[['SpatialUnitId', 'OSMWaterwayId', 'OBJECTID']], on=['SpatialUnitId', 'OSMWaterwayId'])
        diff2['ModifiedDate'] = today_str
        diff2.rename(columns={'geometry': 'SHAPE'}, inplace=True)

        # mssql.update_table_rows(diff2, param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], on=['SpatialUnitId', 'OSMWaterwayId'], index=False, append=True, username=param['gis_waterdata']['username'], password=param['gis_waterdata']['password'], geo_col='SHAPE', clear_table=False, dtype=sql_dtypes)
        mssql.update_table_rows(diff2, param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], on=['SpatialUnitId', 'OSMWaterwayId'], index=False, append=True, geo_col='SHAPE', clear_table=False, dtype=sql_dtypes)

    if not rem1.empty:
        # mssql.del_table_rows(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], pk_df=rem1, username=param['gis_waterdata']['username'], password=param['gis_waterdata']['password'])
        mssql.del_table_rows(param['gis_waterdata']['server'], param['gis_waterdata']['database'], param['gis_waterdata']['reaches']['table'], pk_df=rem1)

    return gdf4, excluded_points, bad_geo, bad_points
示例#13
0
"""샘플쿼리 모듈
:filename:          - query.py
:modified:          - 2017.08.24
:note:              - 이 모듈에서는 자주사용하는 샘플쿼리를 미리 정의함

"""
'''모듈 불러오기'''
from sqlalchemy import types  #ALCHEMY for engine
'''쿼리 인스턴스 임포트'''
#기본상품정보조회
#컬럼 데이터타입
basic_col = \
 {'ISIN_NO'  :types.NVARCHAR(length=50),
    'STD_DATE'  :types.DateTime(),
    'FIRST_AMT' :types.BigInteger(),
    'REMAIN_AMT' :types.BigInteger(),
    'EFF_DATE'  :types.DateTime(),
    'MAT_DATE'  :types.DateTime(),
    'PRSV_RATE' :types.Float()}
#쿼리문
basic_sql = \
 (
    "select ISIN_NO,"
    "		to_date(STD_DATE,'yyyymmdd') STD_DATE, "
    "		FIRST_AMT, REMAIN_AMT, "
    "		EFF_DATE, MAT_DATE, PRSV_RATE "
    "from "
    "( "
    "	select 	tblLATEST.ISIN_NO, "                    #ISIN번호
    "			greatest(tblLATEST.STND_DATE, nvl(tblREFUND.STND_DATE,0)) STD_DATE, "      #처리일자
    "			tblLATEST.FIRST_AMT/1000000 FIRST_AMT, "              #최초발행금액 (백만원)
def check_email(user, sender_email, path_input, path_output, reciever_emails):

    t_preliminary_0 = time.time()
    # Set user.
    # Create engine.
    engine = create_engine('hana://{user}@hananode1:30015'.format(user=user))

    df1 = pd.read_excel(path_input)

    df1.to_sql('vpp_churn_tom_from_python',
               engine,
               schema=user,
               if_exists='replace',
               dtype=types.NVARCHAR(length=255))

    t_preliminary_1 = time.time()

    # Wait for 5 seconds
    #time.sleep(300)

    t_sql_code_0 = time.time()
    sql = """
    SELECT A."Inverter", A."POD (NMI)" NMI, A."*approved BP*" BP_VPP, C.BUSINESSPARTNER BP_Active,C.COMPANY,
    min(CASE WHEN C.BUSINESSPARTNER IS NULL THEN '3_LeftVPP_New_NonAGL_Customer' 
    WHEN C.BUSINESSPARTNER IS NOT NULL AND right(A."*approved BP*",9) <> right(C.BUSINESSPARTNER,9) and C.COMPANY != 'AGL' THEN '3_LeftVPP_New_NonAGL_Customer'
    WHEN C.BUSINESSPARTNER IS NOT NULL AND right(A."*approved BP*",9) <> right(C.BUSINESSPARTNER,9) THEN '4_LeftVPP_New_AGL_Customer'
    when C.BUSINESSPARTNER IS NOT NULL AND right(A."*approved BP*",9) = right(C.BUSINESSPARTNER,9) and C.COMPANY = 'PD' THEN '2_PowerDirect'
    ELSE '1_CURRENT' END) AS STATUS
    , CASE WHEN A."*approved BP*" IS NOT NULL THEN (SELECT max(MOVEINDATE) from "SP_CUSTOMER"."CIA_TheTruthAboutCustomer"D where right(D.BUSINESSPARTNER,9) = right(A."*approved BP*",9) and left(D.NMI,10)=left(A."POD (NMI)",10)) END VPP_MOVEIN
    , CASE WHEN A."*approved BP*" IS NOT NULL THEN (SELECT max(MOVEOUTDATE) from "SP_CUSTOMER"."CIA_TheTruthAboutCustomer"D where right(D.BUSINESSPARTNER,9) = right(A."*approved BP*",9) and left(D.NMI,10)=left(A."POD (NMI)",10)) END VPP_MOVEOUT
    ,CASE WHEN C.BUSINESSPARTNER IS NOT NULL THEN (SELECT max(MOVEINDATE) from "SP_CUSTOMER"."CIA_TheTruthAboutCustomer"D where right(D.BUSINESSPARTNER,9) = right(C.BUSINESSPARTNER,9)and left(D.NMI,10)=left(C.NMI,10)) END CURRENT_CUSTOMER_MOVEIN
    
    from
    	(SELECT * from "{user}"."VPP_CHURN_TOM_FROM_PYTHON") A
    
    left join
    
    	(SELECT * FROM "SP_CUSTOMER"."CIA_TheTruthAboutCustomer" B
    	WHERE FUEL = 'ELEC' AND STATUS = 'ACTIVE'
    	) C on left(A."POD (NMI)",10) = left(C.NMI,10)
    
    GROUP BY A."Inverter", A."POD (NMI)", A."*approved BP*", C.NMI, C.BUSINESSPARTNER, C.TYPE, C.STATE, C.STATUS, C.COMPANY
    order by STATUS
        """.format(user=user)

    df2 = pd.read_sql(sql, engine)
    t_sql_code_1 = time.time()

    t_exportfile_code_0 = time.time()
    today = datetime.today().date()

    path_output_file = path_output + "/Full VPPSA Site List V3 outputfile {datetime}.xlsx".format(
        datetime=today)

    df2.to_excel(path_output_file)
    t_exportfile_code_1 = time.time()

    category_all = df2['nmi'].nunique()
    category_1 = df2.groupby('status')['nmi'].nunique()['1_CURRENT']
    category_2 = df2.groupby('status')['nmi'].nunique()['2_PowerDirect']
    category_3 = df2.groupby(
        'status')['nmi'].nunique()['3_LeftVPP_New_NonAGL_Customer']
    category_4 = df2.groupby(
        'status')['nmi'].nunique()['4_LeftVPP_New_AGL_Customer']

    ##log
    f = open("P:/New Energy/Churn Moveout Report/LOG_RUN.txt", "a+")
    f.write("%s, %s, %s, %s, %s\n" %
            (time.strftime("%x, %X"), len(df2),
             t_preliminary_1 - t_preliminary_0, t_sql_code_1 - t_sql_code_0,
             t_exportfile_code_1 - t_exportfile_code_0))
    f.close()

    if category_2 + category_3 + category_4 > 0:

        message = mailer.Message()

        message.From = sender_email
        message.To = [reciever_emails]
        message.Subject = 'VPPSA move and Churn Report on {datetime}'.format(
            datetime=today)
        message.Body = '''Hi all,
            
            On {today_date}, from {category_all_num} unique NMIs in the VPP list, {category_2_num} NMIs are identified as 2_PowerDirect, {category_3_num} NMIs are identified as 3_VPPChurn_New_NonAGL_Customer , {category_4_num} NMIs are identified as 4_VPPChurn_New_AGL_Customer, and %s NMIs are identified as 1_Current. 
            The report is attached to this email and can be find at {path_output_file_loc}.
            
            Definition of Flags:
            1_CURRENT: The Business partner ID in the VPPSA list is the same as the current active Business partner ID at that NMI.
            2_PowerDirect: The Business partner ID in the VPPSA list is the same as the current active Business partner ID at that NMI, but their COMPANY is power direct. 
            3_LeftVPP_New_NonAGL_Customer: The Business partner ID in the VPPSA list  has left that NMI and the new occupant at that NMI is not an AGL customer.
            4_LeftVPP_New_AGL_Customer: The Business partner ID in the VPPSA list  has left that NMI, but the new occupant at that NMI is still an AGL customer.
        
       
            If you have any questions please let me know.
            
            Kind regards,
            
            Javad'''.format(today_date=today,
                            category_all_num=category_all,
                            category_2_num=category_2,
                            category_3_num=category_3,
                            category_4_num=category_4,
                            category_1=category_1,
                            path_output_file_loc=path_output_file)

        message.attach(path_output_file)

        sender = mailer.Mailer('aglsmtp05.agl.com.au')

        sender.send(message)

    return ()
示例#15
0
def unicode(length, **kwargs):
    return _vary(types.NVARCHAR(length), unicode_map.copy(), kwargs, length)
示例#16
0
df1["AMOUNT"] = df1["AMOUNT"] * df1["TEMP"]
df_volume = pd.concat([df1, df2])
df_volume.drop("TEMP", axis=1, inplace=True)
df_combined = pd.concat([df, df_volume])
print(df_combined)

print("start importing...")
df_combined.to_sql(
    "data",
    con=engine,
    if_exists="replace",
    index=False,
    dtype={
        "DATE": t.DateTime(),
        "AMOUNT": t.FLOAT(),
        "TC I": t.NVARCHAR(length=200),
        "TC II": t.NVARCHAR(length=200),
        "TC III": t.NVARCHAR(length=200),
        "TC IV": t.NVARCHAR(length=200),
        "MOLECULE": t.NVARCHAR(length=250),
        "PRODUCT": t.NVARCHAR(length=250),
        "PACKAGE": t.NVARCHAR(length=250),
        "CORPORATION": t.NVARCHAR(length=250),
        "MANUF_TYPE": t.NVARCHAR(length=20),
        "FORMULATION": t.NVARCHAR(length=50),
        "STRENGTH": t.NVARCHAR(length=20),
        "UNIT": t.NVARCHAR(length=25),
        "PERIOD": t.NVARCHAR(length=3),
        "MOLECULE_TC": t.NVARCHAR(length=255),
        "PRODUCT_CORP": t.NVARCHAR(length=255),
    },
示例#17
0
def import_data(
    df: pd.DataFrame,
    engine: engine,
    table: str,
):
    print("start importing...")
    df.replace([np.inf, -np.inf], np.nan,
               inplace=True)  # 因分母为0除法产生的inf和-inf替换成nan
    df["RSP"] = df["RSP"].apply(
        lambda x: ",".join(x.tolist())
        if type(x) != str and type(x) != float else x)  # RSP字段转为字符串存储
    df.loc[:,
           ["POTENTIAL_DOT", "MAT_SALES", "SHARE"]].fillna(0)  # 数值字段的空值都替换为0

    df.to_sql(
        table,
        con=engine,
        if_exists="replace",
        index=False,
        dtype={
            "HP_ID": t.NVARCHAR(length=10),
            "HP_NAME": t.NVARCHAR(length=100),
            "HOSPITAL": t.NVARCHAR(length=110),
            "PROVINCE": t.NVARCHAR(length=3),
            "CITY": t.NVARCHAR(length=30),
            "COUNTY": t.NVARCHAR(length=30),
            "POTENTIAL_DOT": t.FLOAT(),
            "DATA_SOURCE": t.NVARCHAR(length=30),
            "HP_TYPE": t.NVARCHAR(length=4),
            "DECILE_HP": t.INTEGER(),
            "DECILE_CM": t.INTEGER(),
            "DECILE": t.INTEGER(),
            "DECILE_TOTAL": t.INTEGER(),
            "MAT_SALES": t.FLOAT(),
            "ANNUAL_TARGET": t.FLOAT(),
            "RSP": t.NVARCHAR(length=100),
            "BU": t.NVARCHAR(length=3),
            "RD": t.NVARCHAR(length=3),
            "RM": t.NVARCHAR(length=20),
            "AM": t.NVARCHAR(length=20),
            "STATUS": t.NVARCHAR(length=7),
            "SHARE": t.FLOAT(),
            "SHARE_GROUP": t.NVARCHAR(length=11),
        },
    )
示例#18
0
df1['TEMP'] = df1['TEMP'].apply(np.int64)
df1['AMOUNT'] = df1['AMOUNT'] * df1['TEMP']
df_volume = pd.concat([df1, df2])
df_volume.drop('TEMP', axis=1, inplace=True)
df_combined = pd.concat([df, df_volume])
print(df_combined)

print('start importing...')
df_combined.to_sql('data',
                   con=engine,
                   if_exists='replace',
                   index=False,
                   dtype={
                       'DATE': t.DateTime(),
                       'AMOUNT': t.FLOAT(),
                       'TC I': t.NVARCHAR(length=200),
                       'TC II': t.NVARCHAR(length=200),
                       'TC III': t.NVARCHAR(length=200),
                       'TC IV': t.NVARCHAR(length=200),
                       'MOLECULE': t.NVARCHAR(length=200),
                       'PRODUCT': t.NVARCHAR(length=200),
                       'PACKAGE': t.NVARCHAR(length=200),
                       'CORPORATION': t.NVARCHAR(length=200),
                       'MANUF_TYPE': t.NVARCHAR(length=20),
                       'FORMULATION': t.NVARCHAR(length=50),
                       'STRENGTH': t.NVARCHAR(length=20),
                       'UNIT': t.NVARCHAR(length=25),
                       'PERIOD': t.NVARCHAR(length=3),
                       'MOLECULE_TC': t.NVARCHAR(length=255),
                       'PRODUCT_CORP': t.NVARCHAR(length=255),
                   })
示例#19
0
df['DSM_POS_NAME'] = df['DSM'] + " " + df['DSM_NAME']
df['RSP_POS_NAME'] = df['RSP'] + " " + df['RSP_NAME']
print(df)

print("start importing...")
df.to_sql(
    "data",
    con=engine,
    if_exists="replace",
    index=False,
    dtype={
        "YEAR": t.INTEGER(),
        "DATE": t.DateTime(),
        "MONTH": t.INTEGER(),
        "QUARTER": t.INTEGER(),
        "HP_ID": t.NVARCHAR(length=10),
        "HP_NAME": t.NVARCHAR(length=100),
        "HOSPITAL": t.NVARCHAR(length=110),
        "STORE_ID": t.NVARCHAR(length=10),
        "STORE_NAME": t.NVARCHAR(length=100),
        "STORE": t.NVARCHAR(length=110),
        "PROVINCE": t.NVARCHAR(length=3),
        "CITY": t.NVARCHAR(length=30),
        "COUNTY": t.NVARCHAR(length=30),
        "LEVEL": t.NVARCHAR(length=4),
        "IF_COMMUNITY": t.Boolean(),
        "IF_DUALCALL": t.Boolean(),
        "PRODUCT": t.NVARCHAR(length=10),
        "STRENGTH": t.NVARCHAR(length=10),
        "TAG": t.NVARCHAR(length=4),
        "VOLUME": t.FLOAT(),