Exemplo n.º 1
0
def getJoinCnt(session, params):
    try:
        l = []
        minTS = params[0].strftime('%Y-%m-%d %H:%M:%S')
        maxTS = params[1].strftime('%Y-%m-%d %H:%M:%S')
        rows = session.execute(query=CNT_QUERY,
                               parameters=(minTS, maxTS),
                               trace=True)
        print(rows.get_query_trace())
        for row in rows:
            d = {
                'prodID': row.prodid,
                'consID': row.consid,
                'topic': row.topic,
                'ts': params[0]
            }
            l.append(d)
            log.info("cnt: {}, {}, {}".format(row.prodid, row.consid,
                                              row.topic))
        return l
    except ReadFailure as rf:
        log.error("Error executing getJoinCnt to Cassandra: {}".format(
            rf.args))
        tools.logfile("Error executing getJoinCnt to Cassandra: {}".format(
            rf.args))
        return None
Exemplo n.º 2
0
def updateQuarter():
    global last_update_quarter, session

    # first time execution
    if last_update_quarter is None:
        log.info("last_update_quarter is None")

        if session is None:
            session = db.connect()  # connect to database
            if session is None:
                return False

        last_update_quarter = db.getMinTimestamp(session, "cnt")  # get minTS
        if last_update_quarter is None:
            return False
        last_update_quarter = datetime.datetime(last_update_quarter.year,
                                                last_update_quarter.month,
                                                last_update_quarter.day,
                                                last_update_quarter.hour,
                                                15 * (last_update_quarter.minute // 15))
        # convert first time to local timestamp, because cassandra driver reads TS in UTC
        # last_update_quarter = tools.toUSTZ(first_time) not required in VM (VM time is UTC)

    # 1 hour window not complete
    if last_update_quarter.replace(tzinfo=None) + datetime.timedelta(minutes=15) > datetime.datetime.now():
        log.info("1hour window not complete")
        return False

    # 1 hour window completed
    log.info("window complete, executing update quarter")
    if session is None:
        session = db.connect()  # connect to db
        if session is None:
            return False

    # get cubes
    rows = db.getJoinCnt(session, [last_update_quarter, last_update_quarter + datetime.timedelta(minutes=15)])
    if rows is None:  # None result means error
        return False

    # check empty result
    df = pandas.DataFrame(rows)
    if not df.empty:  # if empty change window run again
        df = df.groupby(['prodID', 'consID', 'topic']).size().reset_index()
        df = df.rename(columns={0: 'cnt'})

        if db.deleteFromCNT(session, [last_update_quarter, last_update_quarter + datetime.timedelta(minutes=15)]):
            insert_statement = session.prepare("INSERT INTO quarter_cnt (prodid, consid, topic, ts, cnt) "
                                               "VALUES (?, ?, ?, ?, ?)")
            db.insertCnt(session, insert_statement, df, last_update_quarter)
        else:
            tools.logfile("Delete at update quarter failed at {} : {}\n".format(last_update_quarter, last_update_quarter
                                                                                + datetime.timedelta(minutes=15)))

    last_update_quarter = last_update_quarter + datetime.timedelta(minutes=15)
    return True
Exemplo n.º 3
0
def connect():
    try:
        cluster = Cluster(CASS_CONTACT_POINTS)
        session = cluster.connect(CASS_KEYSPACE)
        log.info("Connected to Cassandra.")
        return session
    except AttributeError as e:
        log.error("Error connecting to Cassandra: {}".format(e.args))
        tools.logfile("Error connecting to Cassandra: {}".format(e.args))
        return None
Exemplo n.º 4
0
def updateHalf():
    global last_update_half, session

    # first time execution
    if last_update_half is None:
        log.info("last_update_half is None")

        if session is None:
            session = db.connect()  # connect to database
            if session is None:
                return False

        last_update_half = db.getMinTimestamp(session, "quarter_cnt")
        if last_update_half is None:
            return False
        last_update_half = datetime.datetime(last_update_half.year,
                                             last_update_half.month,
                                             last_update_half.day,
                                             last_update_half.hour,
                                             30 * (last_update_half.minute // 30))
        # last_update_half = tools.toUSTZ(first_time)  # convert localize to UTC time VM doesnt need this

    # 6 hours window not complete
    if last_update_half.replace(tzinfo=None) + datetime.timedelta(hours=3) > datetime.datetime.now():
        log.info("6 hours window not complete")
        return False

    log.info("window complete, executing update half")
    if session is None:
        session = db.connect()  # connect to db
        if session is None:
            return False

    # get cubes
    rows = db.getJoinCntFromX(session, [last_update_half, last_update_half + datetime.timedelta(minutes=30),
                                        "quarter_cnt"])
    if rows is None:  # None means error
        return False

    # check empty result
    df = pandas.DataFrame(rows)
    if not df.empty:
        df = df.groupby(['prodID', 'consID', 'topic', 'ts']).sum().reset_index()

        if db.deleteFromCNTX(session, [last_update_half, last_update_half + datetime.timedelta(minutes=30),
                                       "quarter_cnt"]):
            insert_statement = session.prepare("INSERT INTO half_cnt (prodid, consid, topic, ts, cnt) "
                                               "VALUES (?, ?, ?, ?, ?)")
            db.insertCnt(session, insert_statement, df, last_update_half)
        else:
            tools.logfile("Delete at update half failed at {} : {} \n".format(last_update_half, last_update_half
                                                                              + datetime.timedelta(minutes=30)))

    last_update_half = last_update_half + datetime.timedelta(minutes=30)
    return True
Exemplo n.º 5
0
def getMinTimestamp(session, tableName):
    try:
        mints = None
        rows = session.execute(query=MINTS_QUERY.format(tableName))
        mints = rows[0].mints
        log.info("Executed getMinTS query")
        return mints
    except ReadFailure as rf:
        log.error("Error executing getMinTS to Cassandra: {}".format(rf.args))
        tools.logfile("Error executing getMinTS to Cassandra: {}".format(
            rf.args))
        return None
Exemplo n.º 6
0
def updateHour():
    global last_update_hour, session

    # first time execution
    if last_update_hour is None:
        log.info("last_update_hour is None")

        if session is None:
            session = db.connect()  # connect to database
            if session is None:
                return False

        last_update_hour = db.getMinTimestamp(session, "half_cnt")
        if last_update_hour is None:
            return False
        last_update_hour = last_update_hour.replace(microsecond=0, second=0, minute=0)
        # last_update_hour = tools.toUSTZ(first_time)  # convert localize to UTC time

    # 12 hours window not complete
    if last_update_hour.replace(tzinfo=None) + datetime.timedelta(hours=12) > datetime.datetime.now():
        log.info("12 hours window not complete")
        return False

    # 12 hours window completed
    log.info("window complete, executing last_update_hour")
    if session is None:
        session = db.connect()  # connect to db
        if session is None:
            return False

    # get cubes
    rows = db.getJoinCntFromX(session, [last_update_hour, last_update_hour + datetime.timedelta(hours=1), "half_cnt"])
    if rows is None: # None means error
        return False

    # check empty result
    df = pandas.DataFrame(rows)
    if not df.empty:  # if not empty
        df = df.groupby(['prodID', 'consID', 'topic', 'ts']).sum().reset_index()

        if db.deleteFromCNTX(session, [last_update_hour, last_update_hour + datetime.timedelta(hours=1), "half_cnt"]):
            insert_statement = session.prepare("INSERT INTO hour_cnt (prodid, consid, topic, ts, cnt) "
                                               "VALUES (?, ?, ?, ?, ?)")
            db.insertCnt(session, insert_statement, df, last_update_hour)
        else:
            tools.logfile("Delete at UpdateHour Failed {} : {}\n".format(last_update_hour, last_update_hour
                                                                         + datetime.timedelta(hours=1)))

    last_update_hour = last_update_hour + datetime.timedelta(hours=1)
    return True