Exemplo n.º 1
0
def connectDb():
    try:
        print '1. Connecting to mysql ....'
        db = mconnect.connectMysql('', 'root', '', 'localhost')
        print '2. Creating db...'
        mconnect.checkDb('ods', db)
        print '3. Creating procuser...'
        mconnect.checkUser('procuser', db)
        print '4. Connecting to Mysql as procuser'
        db = mconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')

        cur = db.cursor()
        print '5. Show tables'
        cur.execute('SHOW TABLES')
        datList = cur.fetchall()

        tablenameList = {}
        if not datList:
            for dat in datList:
                tablenameList.append(dat[0])
        print '6. Connecting to oracle.......'
        constr = 'ods/ods@callhomeods:1521/callhomeods'
        oraconn = oracon.openconnect(constr)
        print '7. Creating table........'
        checkCreatetable(cur, tablenameList, oraconn, oracon)
        db.close()
        oraconn.close()
        return
    except:
        print "Error reported: " + str(sys.exc_info()[1])
Exemplo n.º 2
0
def firstInstall():
    machine_type = raw_input('Type of machine:')
    if machine_type == 'datamartextract':
        checkmakedir('/mysql/')
        sh.copyfile('/etc/my.cnf', '/etc/my.cnf.bak')
        orgfl = open('/etc/my.cnf', 'r')
        readLines = orgfl.readlines()
        newfl = open('/etc/my.cnf.new', 'w')
        newlines = []
        for ln in readLines:
            if string.find(ln, 'datadir') >= 0:
                newlines.append('datadir=/mysql/')
            else:
                newlines.append(string.replace(ln, '\n', ''))
        newfl.write(string.join(newlines, '\n'))
        newfl.close()
        sh.copyfile('/etc/my.cnf.new', '/etc/my.cnf')
        os.system('service mysqld stop')
        os.system('service mysqld start')
        dbname = 'ods'
        username = '******'
        passwd = 'datamart'
        host = 'localhost'

        cnx = mysqlcon.connectMysql('', 'root', '', 'localhost')
        mysqlcon.CheckCreateDb(dbname, cnx)
        mysqlcon.CheckCreateUser(dbname, username, passwd, cnx)
Exemplo n.º 3
0
def setconnection(dbhost, dbtype, dbsource, dbuser, sqlstmt):
    try:

        userList = [["produser", "pr0duser"], ["datamart", "datamart"],
                    ["dbadmin", "c@llhome"], ["datapulluser", "cAllhome"]]
        retArr = []
        usr = []
        for usr in userList:
            if usr[0] == dbuser:
                passwd = usr[1]

        if dbtype == 'oracle':

            connstr = dbuser + '/' + passwd + '@' + dbhost + '/' + dbsource + ':1521'
            oraconn = oracon.openconnect(connstr)
            dbresulSet = oracon.execSql(oraconn, sqlstmt)

        if dbtype == 'mysql':

            myconn = mysql.connectMysql(dbsource, dbuser, passwd, dbhost)
            cur_mysql = myconn.cursor()
            cur_mysql.execute(sqlstmt)
            dbresulSet = cur_mysql.fetchall()

        if dbtype == 'vertica':
            conn = vconn.vertica_connect(dbhost, 5433, dbuser, passwd,
                                         dbsource)
            vdat = vconn.vertica_sql_execute(conn, sqlstmt)

            dbresulSet = vdat.fetchall()

        retArr = redata(dbresulSet)

        if dbtype == 'oracle':
            dbresulSet.close()
            oraconn.close()
        if dbtype == 'mysql':
            cur_mysql.close()
        if dbtype == 'vertica':
            vdat.close()
        return retArr

    except:
        errlog = open('log/datamart_error.log', 'a')
        function = 'set connection'
        timestr = time.strftime('%m/%d/%Y %H:%M:%S') + ' ' + function
        errlog.write(timestr + '\t Error reported: ' + str(sys.exc_info()[1]) +
                     '\n')
        errlog.write(timestr + '\t SQL STMT: ' + sqlstmt + '\n')
        errlog.close()
Exemplo n.º 4
0
def splitFile(fileid, filename, filepath, datatype, inservserial, datafolder,
              machineid):
    lsconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')
    lscurr = lsconn.cursor()
    try:
        # Setting the path to the individual inserv
        splitfilepath = datafolder + '/' + str(inservserial) + '/'
        #   statslog.logwrite(fl,'Processing file ..'+ filename);

        inputfile = filepath + '/' + filename
        #   statslog.logwrite(fl,'Setup input file......'+inputfile);
        srcfl = open(inputfile)
        #   statslog.logwrite(fl,'Reading file ..'+ filename);
        readalllines = srcfl.readlines()
        srcfl.close
        sqlstmt = 'select SPLIT_FILE_TYPE, SPLIT_FILE_SEARCH_TAG, SPLIT_FILE_SKIP_LINES, ifnull(SPLIT_FILE_END_TAG,\'          \') SPLIT_FILE_END_TAG,'
        sqlstmt += 'SPLIT_FILE_LINE_SEPERATOR,STATSID from STAT_SPLIT_FILE_LOOKUP where datatype=\'' + datatype + '\''

        #   statslog.logwrite(fl,'Reading file Mysql structure for the input file...');
        #   statslog.logwrite(fl,sqlstmt);
        lscurr.execute(sqlstmt)
        #   statslog.logwrite(fl,'Received structure...');
        filenameList = filename.split('.')
        splitfllist = list()
        i = 0
        for splt in filenameList:
            if i > 0:
                splitfllist.append(splt)
            i += 1
        currlinenum = 0

        readfileln = readalllines
        #   statslog.logwrite(fl,'Reading os version...');
        for ln in readfileln:
            if string.find(ln, 'Release version') >= 0:
                osver = string.strip(ln[15:len(ln)])
    #  statslog.logwrite(fl,'Looping through the structure...');
        datrec = lscurr.fetchall()
        for dat in datrec:
            currspllist = list()
            readline = 0
            readfileln = readalllines
            cpstat = 0
            for ln in readfileln:
                if string.find(ln, dat[1]) >= 0:
                    if readline == 0:
                        spflnm = dat[0] + '.' + str(
                            inservserial) + '.' + string.join(
                                splitfllist, '.')
                    cpstat = 1
                if cpstat == 1:
                    currspllist.append(ln)
                    readline += 1

                    if string.strip(ln[0:len(dat[3])]) == string.strip(dat[3]):
                        cpstat = 0
                        break
                    if not ln:
                        print 'ln is null'
                        if currlinenum >= len(readfileln):
                            cpstat = 0
                            break
                        nextstr = readfileln[currlinenum + 1]
                        print 'nexstr:' + nextstr
                        if not nextstr or string.strip(
                                nextstr[0:len(dat[3])]) == string.strip(
                                    dat[3]) or len(
                                        string.strip(nextstr)
                                    ) == 0 or string.find(nextstr, '<') >= 0:
                            cpstat = 0
                            break

                    currlinenum += 1

            splafterskip = list()
            currline = 0
            splafterskip.append(osver + '\n')
            if len(currspllist) > 0:
                # statslog.logwrite(fl,'Creating directory......'+splitfilepath);
                checkmakedir(splitfilepath,
                             'log/splitter_' + filename + '.log')
                # statslog.logwrite(fl,'Writing file ....'+splitfilepath+'/'+spflnm);
                spfl = open(splitfilepath + '/' + spflnm, 'w')

                for curr in currspllist:
                    if currline >= dat[2] and currline < len(currspllist) - 1:
                        if len(string.strip(curr)) > 0:
                            if string.find(string.strip(curr), '-----') == -1:
                                if curr:
                                    splafterskip.append(curr)
                    currline += 1
                spfl.write(string.join(splafterskip))
                spfl.close()
                #sqlstmt='begin dataload.addsplitfile(inputfileid=>'+str(fileid)+',splitfiletype=>\''+lkp[0]+'\',splitfilename=>\''+spflnm+'\'';
                #sqlstmt+=',splitfilepath=>\''+splitfilepath+'\',statsid=>'+str(lkp[5])+'); end;';

                sqlstmt = 'INSERT INTO STATS_SPLIT_FILES(STATS_FILEID, STATS_SPLITFILE_NAME, STATS_SPLITFILE_PATH, STATSID, STATS_SPLIT_FILE_TYPE) values '
                sqlstmt += '(' + str(
                    fileid
                ) + ',\'' + spflnm + '\',\'' + splitfilepath + '\',' + str(
                    dat[5]) + ',\'' + dat[0] + '\')'
                # statslog.logwrite(fl,sqlstmt);
                lscurr.execute(sqlstmt)

        sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=1 where STATS_FILEID=' + str(
            fileid)
        lscurr.execute(sqlstmt)
        lscurr.close()
        lsconn.close()
        #os.remove('log/splitter_'+filename+'.log');
    except:
        fl = statslog.logcreate('log/splitter_' + filename + '.log')
        statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))
        sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=3 where STATS_FILEID=' + str(
            fileid)
        lscurr.execute(sqlstmt)
        sqlstmt = 'delete from STATS_SPLIT_FILES where STATS_FILEID=' + str(
            fileid)
        lscurr.execute(sqlstmt)
        lscurr.close()
        lsconn.close()
Exemplo n.º 5
0
def splitter():
    oraconn = oracon.openconnect(constr)
    lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')
    lcurr = lconn.cursor()
    fl = statslog.logcreate('log/splitter.log')

    try:
        statslog.logwrite(fl, 'Getting Host IP...\n')
        ipadd = hostlib.retHostIP()
        statslog.logwrite(fl, 'Getting number of threads...\n')
        sqlstmt = 'select machineid,numberofthreads,dataprocessing_folder,splitterloc from statsprocessingmachine where ipaddress=\'' + ipadd + '\' and enable =1'
        numthreadrec = oracon.execSql(oraconn, sqlstmt)
        for numthread in numthreadrec:
            numthreads = numthread[1]
            machineid = numthread[0]
            datafolder = numthread[3]

        numsplitterthreads = int(numthreads / 2)
        statslog.logwrite(fl, 'Getting number of datatypes...\n')
        sqlstmt = 'select distinct datatype from stat_split_file_lookup'
        datatyprec = oracon.execSql(oraconn, sqlstmt)
        dtct = 0
        for dt in datatyprec:
            dtct += 1
        numsplitthread = int(numsplitterthreads / dtct)
        datatyprec = oracon.execSql(oraconn, sqlstmt)

        lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome',
                                      'localhost')
        lcurr = lconn.cursor()

        for dtype in datatyprec:
            sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT WHERE FILE_PROCESS_STATUS=0 and DATATYPE=\'' + dtype[
                0] + '\''
            lcurr.execute(sqlstmt)
            reccount = lcurr.fetchall()
            numInMySql = 0
            for rec in reccount:
                numInMySql = rec[0]
            numsplitterthreads = numsplitthread - numInMySql
            if numsplitterthreads > 0:
                statslog.logwrite(fl, 'Processing for... :' + dtype[0])
                sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT '
                sqlstmt += 'WHERE FILE_PROCESS_STATUS=0 AND MACHINEID=' + str(
                    machineid)
                sqlstmt += ' AND ROWNUM <=' + str(
                    numsplitterthreads
                ) + ' AND DATATYPE= \'' + dtype[0] + '\' ORDER BY STATS_FILEID'
                filerec = oracon.execSql(oraconn, sqlstmt)

                statslog.logwrite(fl, 'Inserting data into mysql...\n')
                sqlstmt = 'INSERT INTO STATSPROCESSTRANSACT (STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE) VALUES '
                sql = []

                for flrec in filerec:
                    sql.append('(' + str(flrec[0]) + ',\'' + flrec[1] +
                               '\',\'' + flrec[2] + '\',\'' + flrec[3] + '\')')
                    sqlst = 'begin Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str(
                        flrec[0]) + '; commit; end;'
                    oracon.execSql(oraconn, sqlst)
                if len(sql) > 0:
                    sqlstmt = sqlstmt + string.join(sql, ',')
                    lcurr.execute(sqlstmt)
        lconn.close()
        oraconn.close()
        lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome',
                                      'localhost')
        lcurr = lconn.cursor()
        statslog.logwrite(fl, 'Querying mysql...')
        sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 LIMIT ' + str(
            numsplitthread)
        lcurr.execute(sqlstmt)
        ctr = 0
        statrow = lcurr.fetchall()
        pythonRuns = commands.getoutput(
            "ps -ef | grep python | grep -v grep | wc -l")
        if int(pythonRuns) - 3 < numsplitthread:
            residual = (numsplitthread - int(pythonRuns)) + 3
            sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 LIMIT ' + str(
                residual)
            lcurr.execute(sqlstmt)
            ctr = 0
            statrow = lcurr.fetchall()

            for statrec in statrow:
                statslog.logwrite(fl, 'Splitting file...:' + statrec[1])
                flpthlist = string.split(statrec[2], '/')
                inservserial = flpthlist[len(flpthlist) - 2]
                statslog.logwrite(fl, 'Starting process for ... ' + statrec[1])
                p = Process(target=splitFile,
                            args=(
                                statrec[0],
                                statrec[1],
                                statrec[2],
                                statrec[3],
                                inservserial,
                                datafolder + '/splitter',
                                machineid,
                            ))
                p.daemon = True
                p.start()

                sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str(
                    statrec[0])
                lcurr.execute(sqlstmt)
                ctr += 1
        lcurr.close()
        lconn.close()
        os.remove('log/splitter.log')
    except:
        statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))
Exemplo n.º 6
0
def splitter():
    oraconn = oracon.openconnect(constr)
    lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')
    lcurr = lconn.cursor()
    try:
        sqlstmt = 'update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=0 where FILE_PROCESS_STATUS=2'
        lcurr.execute(sqlstmt)

        ipadd = hostlib.retHostIP()

        sqlstmt = 'select machineid,numberofthreads,dataprocessing_folder,splitterloc,number_of_files_per_run,delay_seconds from statsprocessingmachine where ipaddress=\'' + ipadd + '\' and enable =1'
        numthreadrec = oracon.execSql(oraconn, sqlstmt)
        for numthread in numthreadrec:
            numthreads = numthread[1]
            machineid = numthread[0]
            datafolder = numthread[3]
            files_perrun = numthread[4]
            delay_seconds = numthread[5]

        nummysqltreads = int(numthreads)
        numsplitterthreads = int(numthreads)
        sqlstmt = 'select distinct datatype from stat_split_file_lookup'
        datatyprec = oracon.execSql(oraconn, sqlstmt)
        #dtct=0;
        datatypes = []
        for dt in datatyprec:
            #dtct+=1;
            datatypes.append(dt[0])
        #numsplitthread=int(numsplitterthreads/dtct);
        #datatyprec=oracon.execSql(oraconn,sqlstmt);

        #lconn = lconnect.connectMysql('ods','procuser','c@llhome','localhost');
        #lcurr=lconn.cursor();
        sqlstmt = 'select count(1) from statsprocesstransact where machineid=' + str(
            machineid
        ) + ' and FILE_PROCESS_STATUS=0 and datatype in (\'' + string.join(
            datatypes, '\'' + ',' + '\'') + '\')'
        datatypct = oracon.execSql(oraconn, sqlstmt)
        for dtctrec in datatypct:
            tot = dtctrec[0]

        datatypelist = []
        sqlstmt = 'select datatype,count(1) from statsprocesstransact where machineid=' + str(
            machineid
        ) + ' and FILE_PROCESS_STATUS=0 and datatype in (\'' + string.join(
            datatypes, '\'' + ',' + '\'') + '\') group by datatype'
        datatyprec = oracon.execSql(oraconn, sqlstmt)
        for dt in datatyprec:
            recperdatatype = int(
                ((dt[1] * 1.0) / (tot * 1.0)) * numsplitterthreads)
            datatypelist.append(dt[0] + ',' + str(recperdatatype))

        for dtype in datatypelist:
            datatype = string.split(dtype, ',')
            sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT WHERE IFNULL(FILE_PROCESS_STATUS,0)=0 AND DATATYPE= \'' + datatype[
                0] + '\''
            lcurr.execute(sqlstmt)
            reccount = lcurr.fetchall()
            numInMySql = 0
            for rec in reccount:
                numInMySql = rec[0]
            numsplitterthreads = int(datatype[1]) - int(numInMySql)
            if numsplitterthreads > 0:
                sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT '
                sqlstmt += 'WHERE FILE_PROCESS_STATUS=0 AND MACHINEID=' + str(
                    machineid)
                sqlstmt += ' AND ROWNUM <=' + str(
                    numsplitterthreads) + ' AND DATATYPE= \'' + datatype[
                        0] + '\' ORDER BY STATS_FILEID'
                filerec = oracon.execSql(oraconn, sqlstmt)

                sqlstmt = 'INSERT INTO STATSPROCESSTRANSACT (STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE) VALUES '
                sql = []

                for flrec in filerec:
                    sql.append('(' + str(flrec[0]) + ',\'' + flrec[1] +
                               '\',\'' + flrec[2] + '\',\'' + flrec[3] + '\')')
                    sqlst = 'begin Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str(
                        flrec[0]) + '; commit; end;'
                    oracon.execSql(oraconn, sqlst)
                if len(sql) > 0:
                    sqlstmt = sqlstmt + string.join(sql, ',')
                    lcurr.execute(sqlstmt)
        lconn.close()
        oraconn.close()
        lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome',
                                      'localhost')
        lcurr = lconn.cursor()
        ctr = 0
        pythonRuns = commands.getoutput(
            "ps -ef | grep parser | grep -v grep | wc -l")

        if int(pythonRuns) - 3 < nummysqltreads:
            residual = (nummysqltreads - int(pythonRuns))
            sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0'
            lcurr.execute(sqlstmt)
            totalcurr = lcurr.fetchall()
            for lcur in totalcurr:
                total_recs = lcur[0]
            sqlstmt = 'SELECT DATATYPE,COUNT(1) FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 GROUP BY DATATYPE'
            lcurr.execute(sqlstmt)
            datrec = lcurr.fetchall()
            for lcur in datrec:
                datatype = lcur[0]
                numrecs = int(
                    round((lcur[1] * 1.0 / total_recs * 1.0) * residual * 1.0,
                          0))
                sqlstmt = 'SELECT distinct STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 and DATATYPE=\'' + datatype + '\' LIMIT ' + str(
                    numrecs)
                lcurr.execute(sqlstmt)
                ctr = 0
                statrow = lcurr.fetchall()

                for statrec in statrow:
                    flpthlist = string.split(statrec[2], '/')
                    inservserial = flpthlist[len(flpthlist) - 2]
                    p = Process(target=splitFile,
                                args=(
                                    statrec[0],
                                    statrec[1],
                                    statrec[2],
                                    statrec[3],
                                    inservserial,
                                    datafolder + '/splitter',
                                    machineid,
                                ))
                    p.daemon = True
                    p.start()

                    while ctr > files_perrun:
                        time.sleep(delay_seconds)
                        sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT WHERE FILE_PROCESS_STATUS=2'
                        lcurr.execute(sqlstmt)
                        rowRec = lcurr.fetchall()
                        for rec in rowRec:
                            ctr = rec[0]
                    sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str(
                        statrec[0]) + ' and ifnull(FILE_PROCESS_STATUS,0)=0'
                    lcurr.execute(sqlstmt)
                    ctr += 1
        lcurr.close()
        lconn.close()
        #os.remove('log/splitter.log');
    except:
        fl = statslog.logcreate('log/splitter.log')
        statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))
Exemplo n.º 7
0
def create_table(tablename, part_column, sourcedb, dbuser, dbtype, hostname):
    try:

        dbname = 'ods'
        username = '******'
        passwd = 'datamart'
        host = 'localhost'
        myconn = mysql.connectMysql(dbname, username, passwd, host)

        tableinsert = []
        datamartlogger = open('log/datamart_extract.log', 'a')
        cur_mysql = myconn.cursor()
        columnList = []

        sqlstmt = 'DROP TABLE IF EXISTS ' + tablename
        cur_mysql.execute(sqlstmt)
        timestr = time.strftime('%m/%d/%Y %H:%M:%S')
        datamartlogger.write(timestr + '\t' + sqlstmt + '\n')
        insert_stmt = 'insert into ' + tablename + '('
        createstmt = 'create table ' + tablename + '('
        selectstmt = 'select '
        collist = []
        colact = []
        insertact = []

        sqlstmt = 'select count(1) from ' + tablename
        OracleCount = setconnection(hostname, dbtype, sourcedb, dbuser,
                                    sqlstmt)

        for orct in OracleCount:
            reccount = orct[0]
        datamartlogger.write(timestr + '\t' + tablename + ' has ' +
                             str(reccount) + ' rows\n')

        sqlstmt = 'select COLUMN_NAME, lower(DATA_TYPE),DATA_LENGTH FROM USER_TAB_COLUMNS WHERE TABLE_NAME = \'' + tablename + '\' ORDER BY COLUMN_ID'

        OracleColResultset = setconnection(hostname, dbtype, sourcedb, dbuser,
                                           sqlstmt)

        #Creating table in MySQL based on different column datatype
        for col in OracleColResultset:
            collist.append(col[0])
            if col[1] == 'varchar2':  #converting varchar2 to varchar
                datatype = 'varchar(' + str(col[2]) + ')'
                colact.append('chr(39)||replace(' + col[0] +
                              ',\'|\',\' \')||chr(39)')
                insertact.append('none')
            if col[1] == 'number':  #converting number to decimal(20,2)
                datatype = 'decimal(20,2)'
                colact.append('to_char(nvl(' + col[0] + ',0))')
                insertact.append('none')
            if col[1] == 'date':  #converting date to datetime
                datatype = 'datetime'
                colact.append('chr(39)||to_char(' + col[0] +
                              ',\'YYYYMMDDHH24MISS\')||chr(39)')
                insertact.append('str_to_date(,\'%Y%m%D%H%i%S\'')
            if col[1] == 'char':  #converting char to varchar
                datatype = 'varchar(' + str(col[2]) + ')'
                colact.append('chr(39)||' + col[0] + '||chr(39)')
                insertact.append('none')
            columnList.append(col[0] + ' ' + datatype)
        selectstmt += string.join(colact, '||\'|\'||') + ' from ' + tablename
        partstmt = ' '
        if len(string.strip(part_column)) > 0:
            selectstmt += ' where ' + part_column + ' ='
            partstmt = 'select distinct ' + part_column + ' from ' + tablename + ' order by ' + part_column

        insert_stmt += string.join(collist, ',') + ') values '
        tableinsert.append([
            tablename, selectstmt, insert_stmt,
            string.join(insertact, ';'), partstmt, reccount
        ])
        sqlstmt = createstmt + string.join(columnList, ',') + ')'
        cur_mysql.execute(sqlstmt)
        cur_mysql.close()
        datamartlogger.close()
        return tableinsert
        #Tableinsert is the combined list of tablename,selectstmt,insertstmt
    except:
        errlog = open('log/datamart_error.log', 'a')
        function = 'create_table'
        timestr = time.strftime('%m/%d/%Y %H:%M:%S') + ' ' + function
        errlog.write(timestr + '\t Error reported for the table: ' +
                     tablename + '\n')
        timestr = time.strftime('%m/%d/%Y %H:%M:%S') + ' ' + function
        #errlog.write(timestr+'\t'+col[0]+'\n');
        errlog.write(timestr + '\t Error reported: ' + str(sys.exc_info()[1]) +
                     '\n')

        errlog.close()
Exemplo n.º 8
0
#!/usr/bin/env python
import oracleconnect as oracon
import mysqlconnect as mysqlconn
import string
import sys

constr = 'ods/ods@callhomeods:1521/callhomeods'
try:
    lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')
    curr = lc.cursor()
    sqlstmt = 'SELECT STATS_FILEID FROM STATSPROCESSTRANSACT WHERE IFNULL(FILE_PROCESS_STATUS,0) IN (0,2)'
    curr.execute(sqlstmt)
    fileidrec = curr.fetchall()
    fileidList = []
    print 'Getting filelist from mysql.........'
    recordsprocessed = 0
    totaldone = 0
    for idrec in fileidrec:
        fileidList.append(str(idrec[0]))
        recordsprocessed += 1
        if recordsprocessed > 999:
            fileids = string.join(fileidList, ',')
            sqlstmt = 'begin update STATSPROCESSTRANSACT SET FILE_PROCESS_STATUS=0 WHERE STATS_FILEID IN (' + fileids + '); commit; end;'
            oraconn = oracon.openconnect(constr)
            oracon.execSql(oraconn, sqlstmt)
            totaldone += recordsprocessed
            recordsprocessed = 0
            print 'Total records updated...:' + str(totaldone)
            fileidList = []

    if len(fileidList) > 0:
Exemplo n.º 9
0
#!/usr/bin/env python
import oracleconnect as oracon
import mysqlconnect as mconnect
import string
import time
import hostlib as hst
import os
import statslog
import sys

constr = 'ods/ods@callhomeods:1521/callhomeods'

lconn = mconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')
lcurr = lconn.cursor()
hstname = hst.retHostName()
ipadd = hst.retHostIP()

SplitConStr = 'ods/ods@callhomedb03:1521/callhomedb03'
ProcConStr = 'ods/ods@callhomedb04:1521/callhomedb04'

sqlArr = []
MysqlArr = []
oraconn = oracon.openconnect(constr)

sql = 'select purpose from statsprocessingmachine where ipaddress=\'' + ipadd + '\''
recon = oracon.execSql(oraconn, sql)
for rec in recon:
    purpose = rec[0]
oraconn.close()

try:
Exemplo n.º 10
0
def updateMetadata():
    try:
        lsconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome',
                                       'localhost')
        lscurr = lsconn.cursor()

        oraconn = oracon.openconnect(constr)

        sqlstmt = 'SELECT STATSID, STATS_STRUCTURE_ID, STATSNAME, STATS_VERSION, STATS_FIRST_ROW_VERSION, STATS_SINGLE_ROW, STATS_END_OF_ROW FROM STATSSTRUCTURE ORDER BY 1'
        statrec = oracon.execSql(oraconn, sqlstmt)

        sqlstmt = 'SELECT DATATYPE, SPLIT_FILE_TYPE, SPLIT_FILE_SEARCH_TAG, SPLIT_FILE_SKIP_LINES, SPLIT_FILE_END_TAG, SPLIT_FILE_LINE_SEPERATOR, STATSID FROM STAT_SPLIT_FILE_LOOKUP'
        splitrec = oracon.execSql(oraconn, sqlstmt)

        sqlstmt = 'SELECT STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID FROM PROCESSLOGIC'
        processrec = oracon.execSql(oraconn, sqlstmt)

        sqlstmt = 'SELECT FUNCTIONID, FUNCTIONNAME, FUNCTIONPARAMETERS, FUNCTIONDESC FROM PROCESSFUNCTION'
        profunc = oracon.execSql(oraconn, sqlstmt)

        for rec in statrec:
            sqlstmt = 'SELECT COUNT(1) FROM STATSSTRUCTURE WHERE STATSID=' + str(
                rec[0]) + ' and STATS_STRUCTURE_ID=' + str(rec[1])
            lscurr.execute(sqlstmt)
            recData = lscurr.fetchall()
            recCount = 0
            for recdat in recData:
                recCount = recdat[0]
            if recCount == 0:
                if not rec[6]:
                    end_row = ''
                else:
                    end_row = rec[6]

                sqlstmt = 'INSERT INTO STATSSTRUCTURE (STATSID, STATS_STRUCTURE_ID, STATSNAME, STATS_VERSION, STATS_FIRST_ROW_VERSION, STATS_SINGLE_ROW, STATS_END_OF_ROW) VALUES '
                sqlstmt += ' (' + str(rec[0]) + ',' + str(
                    rec[1]
                ) + ',\'' + rec[2] + '\',\'' + rec[3] + '\',\'' + rec[
                    4] + '\',\'' + rec[5] + '\',\'' + end_row + '\')'
                lscurr.execute(sqlstmt)

        for rec in splitrec:
            sqlstmt = 'SELECT COUNT(1) FROM STAT_SPLIT_FILE_LOOKUP WHERE DATATYPE=\'' + str(
                rec[0]) + '\' and  SPLIT_FILE_TYPE=\'' + rec[1] + '\''
            lscurr.execute(sqlstmt)
            recData = lscurr.fetchall()
            recCount = 0
            for recdat in recData:
                recCount = recdat[0]
            if recCount == 0:
                if not rec[5]:
                    line_sep = ''
                else:
                    line_sep = rec[5]
                sqlstmt = 'INSERT INTO STAT_SPLIT_FILE_LOOKUP (DATATYPE, SPLIT_FILE_TYPE, SPLIT_FILE_SEARCH_TAG, SPLIT_FILE_SKIP_LINES, SPLIT_FILE_END_TAG, SPLIT_FILE_LINE_SEPERATOR, STATSID) '
                sqlstmt += 'VALUES (\'' + str(rec[0]) + '\',\'' + str(
                    rec[1]) + '\',\'' + rec[2] + '\',' + str(
                        rec[3]
                    ) + ',\'' + rec[4] + '\',\'' + line_sep + '\',' + str(
                        rec[6]) + ')'
                lscurr.execute(sqlstmt)

        for rec in processrec:
            sqlstmt = 'SELECT COUNT(1) FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID=' + str(
                rec[0]) + ' and PROCESS_SEQUENCE=' + str(
                    rec[1]) + ' and FUNCTIONID=' + str(rec[2])
            #sqlstmt='SELECT COUNT(1) FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID='+str(rec[0]);
            lscurr.execute(sqlstmt)
            recData = lscurr.fetchall()
            recCount = 0
            for recdat in recData:
                recCount = recdat[0]
            if recCount == 0:
                sqlstmt = 'INSERT INTO PROCESSLOGIC (STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID) '
                sqlstmt += 'VALUES (' + str(rec[0]) + ',' + str(
                    rec[1]) + ',' + str(rec[2]) + ')'
                lscurr.execute(sqlstmt)
            #else:
            ##fix logic if needed
            #    sqlstmt='select STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID='+str(rec[0])+' and PROCESS_SEQUENCE='+str(rec[1]);
            #    lscurr.execute(sqlstmt);
            #    dat=lscurr.fetchall();
            #    for dt in dat:
            #        if dt[2] != rec[2]:
            #            sqlstmt='update PROCESSLOGIC SET FUNCTIONID='+str(rec[2])+' WHERE STATS_STRUCTURE_ID='+str(rec[0])+' and PROCESS_SEQUENCE='+str(rec[1]);
            #            lscurr.execute(sqlstmt);

        sqlstmt = 'SELECT STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID FROM PROCESSLOGIC'
        lscurr.execute(sqlstmt)
        mySqlDat = lscurr.fetchall()
        for myRec in mySqlDat:
            sqlstmt = 'SELECT COUNT(1) FROM PROCESSLOGIC where STATS_STRUCTURE_ID=' + str(
                myRec[0]) + ' AND PROCESS_SEQUENCE=' + str(
                    myRec[1]) + ' AND FUNCTIONID=' + str(myRec[2])
            processrec = oracon.execSql(oraconn, sqlstmt)
            for recdat in processrec:
                recCount = recdat[0]
            if recCount == 0:
                sqlstmt = 'delete from PROCESSLOGIC where STATS_STRUCTURE_ID=' + str(
                    myRec[0]) + ' and PROCESS_SEQUENCE =' + str(
                        myRec[1]) + ' and FUNCTIONID =' + str(myRec[2])
                lscurr.execute(sqlstmt)

        for rec in profunc:
            sqlstmt = 'SELECT COUNT(1) FROM PROCESSFUNCTION WHERE FUNCTIONID=' + str(
                rec[0]) + ' and FUNCTIONNAME=\'' + str(rec[1]) + '\''
            lscurr.execute(sqlstmt)
            recData = lscurr.fetchall()
            recCount = 0
            for recdat in recData:
                recCount = recdat[0]
            if recCount == 0:
                sqlstmt = 'INSERT INTO PROCESSFUNCTION (FUNCTIONID, FUNCTIONNAME, FUNCTIONPARAMETERS, FUNCTIONDESC) '
                sqlstmt += 'VALUES (' + str(rec[0]) + ',\'' + str(
                    rec[1]) + '\',\'' + str(rec[2]) + '\',\'' + str(
                        rec[3]) + '\')'
                lscurr.execute(sqlstmt)

        lscurr.close()
        lsconn.close()
        oraconn.close()
    except:
        fl = statslog.logcreate('log/copymeta.log')
        statslog.logwrite(fl, 'Error reported: ' + str(sys.exc_info()[1]))
import requests
import mysqlconnect
from urllib.parse import urlencode
from pyquery import PyQuery as pq
import time

urlSearch = 'https://www.zhihu.com/api/v4/search_v3?'
headers = {
    'cookie':
    'd_c0="AJDAEUUImwqPTkKyoYgudon8wR6W8cgZ_gM=|1474982733"; q_c1=23b9eda7df81459c9789a908930cca72|1507096790000|1474982733000; _zap=55c589c0-280d-4fe5-8a8d-f5aaa935ad0c; _xsrf=Gs0BGPtSCbvuI78Y3ma5t01tgIhum0RR; q_c1=23b9eda7df81459c9789a908930cca72|1533896590000|1474982733000; __utma=155987696.718434429.1525358318.1533909362.1533971909.3; __gads=ID=a5be66fc4c81514b:T=1554558539:S=ALNI_MZ2Wtus8SRwdPsdt5NIdcbqpzUDcg; tgw_l7_route=4860b599c6644634a0abcd4d10d37251; capsion_ticket="2|1:0|10:1555688182|14:capsion_ticket|44:ODgwMzMxNmYwOWFkNDllOWE1NmEzMzAxNjdkMGJkNDE=|6c9064c327dd03cc62dcbed0c978b3cbcfc872a83df6efef02724e6549cb879d"; z_c0="2|1:0|10:1555688239|4:z_c0|92:Mi4xOXhCWkF3QUFBQUFBa01BUlJRaWJDaWNBQUFDRUFsVk5MM2poWEFCUmpFRl9hSU12ckJPUU13Qk53WlRFWEFvX0tR|574bfa02342878f5394354e72dfc9ab57f324c1e1893d1ab531861041f70421d"; tst=r',
    'user-agent':
    'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}

#拿到数据库的游标
conn = mysqlconnect.connectMysql()
cur = conn.cursor()


#创建数据表,如果已存在就不建
def createTables():
    #存放知乎精选回答
    if not mysqlconnect.tableExsist(cur, 'searchanswer'):
        cur.execute('''
        create table searchanswer(
        id int unsigned primary key auto_increment,
        title text not null,
        author text,
        avatar text,
        article mediumtext
        )default charset=utf8mb4;
Exemplo n.º 12
0
def processFile(STATS_SPLITFILE_ID, STATS_SPLITFILE_NAME, STATS_SPLITFILE_PATH,
                STATS_SPLITFILE_TYPE, INSERVSERIAL, DATAFOLDER, MACHINEID):
    try:
        fl = statslog.logcreate("log/" + STATS_SPLITFILE_NAME)
        lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')
        curr = lc.cursor()

        errlog = 'log/' + STATS_SPLITFILE_NAME + '.err'
        if versionExist(STATS_SPLITFILE_TYPE) == 1:
            version = prclib.getVersion(STATS_SPLITFILE_NAME,
                                        STATS_SPLITFILE_PATH)
        else:
            version = '2.3.1'

        sqlstmt = 'SELECT COUNT(1) FROM STATSSTRUCTURE WHERE STATS_VERSION=\'' + version + '\' and STATSNAME=\'' + STATS_SPLITFILE_TYPE + '\''
        curr.execute(sqlstmt)
        dat = curr.fetchall()
        for datrec in dat:
            count = datrec[0]

        if count > 0:
            sqlstmt = 'SELECT STATS_STRUCTURE_ID FROM STATSSTRUCTURE WHERE STATS_VERSION=\'' + version + '\' and STATSNAME=\'' + STATS_SPLITFILE_TYPE + '\''
            curr.execute(sqlstmt)
            verdat = curr.fetchall()
            STATS_STRUCTURE_ID = 0
            for ver in verdat:
                STATS_STRUCTURE_ID = ver[0]

            if STATS_STRUCTURE_ID > 0:
                sqlstmt = 'SELECT PROCESS_SEQUENCE,FUNCTIONID,PARAMETER_VALUES FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID=' + str(
                    STATS_STRUCTURE_ID) + ' ORDER BY PROCESS_SEQUENCE'
                curr.execute(sqlstmt)
                prcdat = curr.fetchall()

                linearray = prclib.readFile(STATS_SPLITFILE_NAME,
                                            STATS_SPLITFILE_PATH)
                for prc in prcdat:
                    sqlstmt = 'SELECT FUNCTIONNAME FROM PROCESSFUNCTION WHERE FUNCTIONID=' + str(
                        prc[1])
                    curr.execute(sqlstmt)
                    funcNamerec = curr.fetchall()
                    for fu in funcNamerec:
                        functionName = fu[0]
                    func = getattr(prclib, functionName)
                    linearray = func(linearray)

                filepath = DATAFOLDER + '/' + str(INSERVSERIAL) + '/'
                checkmakedir(filepath, errlog)
                outputFl = open(filepath + STATS_SPLITFILE_NAME, 'w')
                outputFl.write(linearray)
                sqlstmt = 'insert into STATSOUTPUT(STATS_SPLITFILE_ID,STATS_OUTPUTFILE_NAME,STATS_OUTPUTFILE_PATH,STATS_FILE_CREATE_DATE,STATS_FILE_LOAD_STATUS,STATS_STRUCTURE_TYPE_ID) values '
                sqlstmt += '(' + str(
                    STATS_SPLITFILE_ID
                ) + ',\'' + STATS_SPLITFILE_NAME + '\',\'' + filepath + '\',sysdate(),0,' + str(
                    STATS_STRUCTURE_ID) + ')'
                curr.execute(sqlstmt)
                sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=1 where STATS_SPLITFILE_ID=' + str(
                    STATS_SPLITFILE_ID)
                curr.execute(sqlstmt)
                curr.close()
                lc.close()
            else:
                sqlstmt = 'DELETE FROM STATS_SPLIT_FILES WHERE STATS_SPLITFILE_ID=' + str(
                    STATS_SPLITFILE_ID)
                curr.execute(sqlstmt)
                curr.close()
                lc.close()
        else:
            fl = statslog.logcreate("log/" + STATS_SPLITFILE_NAME)
            statslog.logwrite(
                fl,
                "Error reported: Version " + version + " not yet supported")
            sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=3 where STATS_SPLITFILE_ID=' + str(
                STATS_SPLITFILE_ID)
            lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome',
                                        'localhost')
            curr = lc.cursor()
            curr.execute(sqlstmt)
            curr.close()
            lc.close()
    except:
        fl = statslog.logcreate("log/" + STATS_SPLITFILE_NAME)
        statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))
        sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=3 where STATS_SPLITFILE_ID=' + str(
            STATS_SPLITFILE_ID)
        lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost')
        curr = lc.cursor()
        curr.execute(sqlstmt)
        curr.close()
        lc.close()
Exemplo n.º 13
0
def procExec():
    try:
        oraconn = oracon.openconnect(constr)
        lconn = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome',
                                       'localhost')

        lcurr = lconn.cursor()
        ipadd = hostlib.retHostIP()

        sqlstmt = 'select machineid,numberofthreads,processloc,number_of_files_per_run,delay_seconds from statsprocessingmachine where ipaddress=\'' + ipadd + '\' and enable =1'
        numthreadrec = oracon.execSql(oraconn, sqlstmt)
        for numthread in numthreadrec:
            numthreads = numthread[1]
            machineid = numthread[0]
            datafolder = numthread[2]
            files_per_run = numthread[3]
            delay_seconds = numthread[4]

        #statslog.logwrite(fl,str(numthreads)+':'+str(files_per_run)+':'+str(delay_seconds));
        numthreads = int(numthreads)

        sqlstmt = 'select distinct statsname from STATSSTRUCTURE a,PROCESSLOGIC b where a.STATS_STRUCTURE_ID = b.STATS_STRUCTURE_ID'

        lcurr.execute(sqlstmt)
        recc = lcurr.fetchall()
        statList = []
        for rec in recc:
            statList.append('\'' + rec[0] + '\'')
        statsStr = string.join(statList, ',')
        sqlstmt = 'update  STATS_SPLIT_FILES set FILE_PROCESS_STATUS=0 WHERE FILE_PROCESS_STATUS=2'
        lcurr.execute(sqlstmt)

        sqlstmt = 'select count(1) from STATS_SPLIT_FILES where ifnull(FILE_PROCESS_STATUS,0)=0 and STATS_SPLIT_FILE_TYPE in (' + statsStr + ')'

        lcurr.execute(sqlstmt)
        recc = lcurr.fetchall()

        numRec = 0
        for rec in recc:
            numRec = rec[0]

        numthreadstocopy = numthreads - numRec
        #statslog.logwrite(fl,str(numthreadstocopy));

        if numthreadstocopy > 0:
            sqlstmt = ''
            rows = 1000
            totrows = rows
            while totrows <= numthreadstocopy:
                sqlstmt = 'select STATS_FILEID,STATS_SPLITFILE_ID,STATS_SPLITFILE_NAME,STATS_SPLITFILE_PATH,STATSID,STATS_SPLIT_FILE_TYPE from STATS_SPLIT_FILES '
                sqlstmt += ' where FILE_PROCESS_STATUS=0 and MACHINEID=' + str(
                    machineid
                ) + ' and ROWNUM <=' + str(
                    rows
                ) + ' and STATS_SPLIT_FILE_TYPE in (' + statsStr + ') order by STATS_SPLITFILE_ID desc'
                #statslog.logwrite(fl,sqlstmt);
                FileRec = oracon.execSql(oraconn, sqlstmt)

                sqlstmt = 'INSERT INTO STATS_SPLIT_FILES (STATS_FILEID,STATS_SPLITFILE_ID,STATS_SPLITFILE_NAME,STATS_SPLITFILE_PATH,STATSID,STATS_SPLIT_FILE_TYPE) VALUES '
                sql = []

                for flrec in FileRec:
                    sql.append('(' + str(flrec[0]) + ',' + str(flrec[1]) +
                               ',\'' + flrec[2] + '\',\'' + flrec[3] + '\',' +
                               str(flrec[4]) + ',\'' + flrec[5] + '\')')
                    sqlst = 'begin Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=2 where STATS_SPLITFILE_ID=' + str(
                        flrec[1]) + '; commit; end;'
                    oracon.execSql(oraconn, sqlst)

                if len(sql) > 0:
                    sqlstmt += string.join(sql, ',')
                    lcurr.execute(sqlstmt)
                totrows = totrows + rows
        pythonRuns = commands.getoutput(
            "ps -ef | grep parsetabber | grep -v grep | wc -l")
        while int(pythonRuns) - 3 <= files_per_run:
            sqlstmt = 'SELECT a.STATS_SPLITFILE_ID,a.STATS_SPLITFILE_NAME,a.STATS_SPLITFILE_PATH,STATS_SPLIT_FILE_TYPE from  STATS_SPLIT_FILES a where ifnull(FILE_PROCESS_STATUS,0)=0  and  STATS_SPLIT_FILE_TYPE in (' + statsStr + ') LIMIT ' + str(
                files_per_run - int(pythonRuns))
            #statslog.logwrite(fl,sqlstmt);
            if (files_per_run - int(pythonRuns)) > 0:
                ctr = 0
                lcurr = lconn.cursor()
                lcurr.execute(sqlstmt)
                filelist = lcurr.fetchall()

                for flrec in filelist:
                    flpthlist = string.split(flrec[2], '/')
                    inservserial = flpthlist[len(flpthlist) - 2]
                    p = Process(target=processFile,
                                args=(
                                    flrec[0],
                                    flrec[1],
                                    flrec[2],
                                    flrec[3],
                                    inservserial,
                                    datafolder,
                                    machineid,
                                ))
                    p.daemon = True
                    p.start()
                    ctr += 1
                    sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=2 where STATS_SPLITFILE_ID=' + str(
                        flrec[0])

                    lcurr.execute(sqlstmt)
                lcurr.close()
                if ctr == 0:
                    break
                pythonRuns = commands.getoutput(
                    "ps -ef | grep parsetabber | grep -v grep | wc -l")
                while int(pythonRuns) > files_per_run:
                    time.sleep(delay_seconds)
                    pythonRuns = commands.getoutput(
                        "ps -ef | grep parsetabber | grep -v grep | wc -l")
            pythonRuns = commands.getoutput(
                "ps -ef | grep parsetabber | grep -v grep | wc -l")

        lconn.close()
    except:
        fl = statslog.logcreate("log/parser.log")
        statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))