Пример #1
0
def model2(data):
    result = detect_vec(data,
                        max_anoms=0.2,
                        direction='both',
                        period=14,
                        alpha=0.5)
    return np.array(result['anoms'].index)
Пример #2
0
def run(period = None, longterm_period = None, link_data = None):


    if period is None:
        period = 24
        longterm_period = period * 30 + 1

    twitter_example_data = pd.read_csv(link_data)

    data = twitter_example_data[twitter_example_data.columns[1]]
    results = detect_vec(data, max_anoms=0.02, \
                         direction='both', period=period, longterm_period=longterm_period)

    res_table = results['anoms']
    index_ano = res_table[res_table.columns[1]].tolist()

    res = []
    for i in range(len(data)):
        if i in index_ano:
            res.append(1)
        else:
            res.append(0)

    res = pd.DataFrame(res)
    res.columns = ['ano']

    result_table = pd.concat([twitter_example_data.reset_index(drop=True), res], axis=1)
    return result_table
Пример #3
0
 def test_both_directions_e_value_threshold_med_max(self):
     results = detect_vec(self.raw_data.iloc[:, 1],
                          max_anoms=0.02,
                          direction='both',
                          period=1440,
                          threshold="med_max",
                          e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 6)
Пример #4
0
 def test_both_directions_e_value_longterm(self):
     results = detect_vec(self.raw_data.iloc[:, 1],
                          max_anoms=0.02,
                          direction='both',
                          period=1440,
                          longterm_period=1440 * 14,
                          e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 131)
Пример #5
0
 def test_both_directions_with_plot(self):
     results = detect_vec(self.raw_data.iloc[:, 1],
                          max_anoms=0.02,
                          direction='both',
                          period=1440,
                          only_last=True,
                          plot=False)
     eq_(len(results['anoms'].columns), 2)
     eq_(len(results['anoms'].iloc[:, 1]), 25)
Пример #6
0
 def test_both_directions_e_value_longterm(self):
     results = detect_vec(
         self.raw_data.iloc[:, 1],
         max_anoms=0.02,
         direction="both",
         period=1440,
         longterm_period=1440 * 14,
         e_value=True,
     )
     eq_(len(results["anoms"].columns), 3)
     eq_(len(results["anoms"].iloc[:, 1]), 131)
Пример #7
0
def s_h_esd_algo(total_daily):
    #total_daily[garage_index][day_index][hour_index]
    #get the dates
    dates = bdate_range(smarking_globals.start_date, periods=len(total_daily))
    #print dates
    data = []
    hours = []
    index = 0

    #hard coding for one garage, sorry
    #ww is a day
    for ww in total_daily:
        #create the hours for that day
        temp_hours = bdate_range(dates[index], periods=24, freq='H')
        m = 0
        #ww is a day
        #for each hour
        for hr in ww:
            hours.append(temp_hours[m])
            data.append(hr)
            m = m + 1
            #data.append(temp)
        index = index + 1

    df1 = Series((v for v in data))

    try:
        results = detect_vec(df1, period=120, max_anoms=0.02, direction='both')
    except RRuntimeError:
        #there is something wrong with the data, may be not periodic
        print("could not run anomaly detection due to bad data")
        sys.exit(0)
    temp = results['anoms']

    indices = []
    for index, row in temp.iterrows():
        indices.append(row['timestamp'])
    #now indices has all the indices of anomalies in the data.
    #get the dates now
    result_dates = []
    for ii in indices:
        result_dates.append(hours[int(ii)].date())
    return result_dates
Пример #8
0
 def test_both_directions_e_value_threshold_med_max(self):
     results = detect_vec(
         self.raw_data.iloc[:, 1], max_anoms=0.02, direction="both", period=1440, threshold="med_max", e_value=True
     )
     eq_(len(results["anoms"].columns), 3)
     eq_(len(results["anoms"].iloc[:, 1]), 6)
Пример #9
0
 def test_both_directions_with_plot(self):
     results = detect_vec(
         self.raw_data.iloc[:, 1], max_anoms=0.02, direction="both", period=1440, only_last=True, plot=False
     )
     eq_(len(results["anoms"].columns), 2)
     eq_(len(results["anoms"].iloc[:, 1]), 25)
Пример #10
0
 def test_check_constant_series(self):
     s = pd.Series([1] * 1000)
     results = detect_vec(s, period=14, direction='both', plot=False)
     eq_(len(results['anoms'].columns), 2)
     eq_(len(results['anoms'].iloc[:,1]), 0)
Пример #11
0
def check_error_real_time():
    global time
    global garage_info_occupancy
    global contracts
    global transients
    global anomalies

    #keep adding the values
    line_index = 0
    for i in garage_dict:

        contracts_hist = []
        contracts_real_time = []
        transients_hist = []
        transients_real_time = []

        current_contract = 0.0
        current_transient = 0.0

        #print i
        con = 0
        tran = 0
        url = "https://my.smarking.net/api/ds/v3/garages/" + str(
            i) + "/current/occupancy?gb=User+Type"
        #print url

        #get the response using the url
        response = requests.get(url, headers=headers)
        content = response.content

        #see if content was received.  If nothing  received, exit
        if (content == ""):
            #print "<p>No content received</p>"
            continue

        #we have collected all the data
        #each datapoint is for an hour in a given day
        try:
            garage_info = json.loads(content)
        except ValueError:
            #raise ValueError("No JSON Object received, please try again.")
            continue

        #print garage_info

        #parse the JSON-formatted line

        #if value not received for some reason, add 0 to value
        if "value" not in garage_info:
            #did not find anything, continue to next garage
            continue
        for item in garage_info["value"]:
            group = str(item.get("group"))
            if ('Contract' in group):
                current_contract = float(item.get("value"))
                con = 1
            if ('Transient' in group):
                current_transient = float(item.get("value"))
                tran = 1

        #days_window din age theke period length hobe ekhon koto ghonta tar upor

        # now prepare to get the historical data
        #getting the closest rounded off hour
        current_t = datetime.now()

        next_time = current_t + timedelta(hours=1)

        #skipping 11PM for now
        if ((current_t.hour == 23) or (current_t.hour == 0)):
            continue
        period_length = current_t.hour + 1

        #days_window din age theke ei period download

        # we have to make sure that we received all data correctly
        received_flag = 1
        for ii in reversed(np.arange(1, days_window + 1)):
            day = next_time - timedelta(days=ii)
            pred_url = "https://my.smarking.net/api/ds/v3/garages/" + str(
                i) + "/past/occupancy/from/" + str(day.year) + "-" + str(
                    day.month) + "-" + str(day.day) + "T00:00:00/" + str(
                        period_length) + "/1h?gb=User+Type"
            #get the response using the url
            #print pred_url
            response = requests.get(pred_url, headers=headers)
            content = response.content

            #see if content was received.  If nothing  received, exit
            if (content == ""):
                #print "<p>No content received</p>"
                received_flag = 0
                break

            #we have collected all the data
            #each datapoint is for an hour in a given day
            try:
                garage_info = json.loads(content)
            except ValueError:
                #raise ValueError("No JSON Object received, please try again.")
                received_flag = 0
                break
            #parse the JSON-formatted line

            #if value not received for some reason, add 0 to value
            if "value" not in garage_info:
                #did not find anything, continue to next garage
                received_flag = 0
                break

            con = 0
            tran = 0
            for item in garage_info["value"]:
                group = str(item.get("group"))
                #print "group ",group
                if ('Contract' in group):
                    for jj in item.get("value"):
                        contracts_hist.append(jj)
                    con = 1
                if ('Transient' in group):
                    for jj in item.get("value"):
                        transients_hist.append(jj)
                    tran = 1

            if ((con == 0) and (tran == 0)):
                #print "did not receive contract and transient"
                received_flag = 0
                break

        #if we did not receive all data correctly, go to a different garage
        if (received_flag == 0):
            continue

        #now contruct the data for now/ recent
        pred_url = "https://my.smarking.net/api/ds/v3/garages/" + str(
            i) + "/past/occupancy/from/" + str(current_t.year) + "-" + str(
                current_t.month) + "-" + str(
                    current_t.day) + "T00:00:00/" + str(period_length -
                                                        1) + "/1h?gb=User+Type"
        #get the response using the url
        #print pred_url
        response = requests.get(pred_url, headers=headers)
        content = response.text

        #see if content was received.  If nothing  received, exit
        if (content == ""):
            #print "<p>No content received</p>"
            continue

        #we have collected all the data
        #each datapoint is for an hour in a given day
        try:
            garage_info = json.loads(content)
        except ValueError:
            #raise ValueError("No JSON Object received, please try again.")
            continue

        #parse the JSON-formatted line

        #if value not received for some reason, add 0 to value
        if "value" not in garage_info:
            #did not find anything, continue to next garage
            continue
        con = 0
        tran = 0
        for item in garage_info["value"]:
            group = str(item.get("group"))
            #print "group ",group
            if ('Contract' in group):
                for jj in item.get("value"):
                    contracts_real_time.append(jj)
                #finally append the real time data
                contracts_real_time.append(current_contract)
                con = 1
            if ('Transient' in group):
                for jj in item.get("value"):
                    transients_real_time.append(jj)
                transients_real_time.append(current_transient)
                tran = 1
        if ((con == 0) and (tran == 0)):
            #no data received
            continue

        #now form the training signal appending the history with the
        #real time
        training_contract = []
        training_transient = []

        if (con == 1):
            for ii in contracts_hist:
                training_contract.append(ii)
            for ii in contracts_real_time:
                training_contract.append(ii)
            #detect anomalies and report

            log_file.write(
                str(i) + " running contract anomaly detection " +
                str(datetime.now()) + '\n')
            log_file.flush()

            #not enough data for signal processing in early hours
            indices = []

            if ((current_t.hour == 1) or (current_t.hour == 2)
                    or (current_t.hour == 3) or (current_t.hour == 4)
                    or (current_t.hour == 5)):
                indices = get_iqr_anomaly(training_contract)
            else:
                df1 = Series((v for v in training_contract))
                try:
                    results = detect_vec(df1,
                                         period=period_length,
                                         max_anoms=0.02,
                                         direction='both')
                except RuntimeError:
                    #there is something wrong with the data, may be not periodic
                    #print "could not run detect_vec"
                    continue
                temp = results['anoms']
                for index, row in temp.iterrows():
                    indices.append(row['timestamp'])

            anomalies = [
                mm for mm in indices
                if mm >= (period_length * (days_window + 1) - 2)
            ]
            if anomalies:
                #check how many times
                if (anomaly_count_con[line_index] == 5):
                    print(i, datetime.now(), anomaly_count_con[line_index],
                          " Contract anomaly ", current_contract)
                    anomaly_count_con[line_index] = 0
                else:
                    anomaly_count_con[
                        line_index] = anomaly_count_con[line_index] + 1

        if (tran == 1):
            for ii in transients_hist:
                training_transient.append(ii)
            for ii in transients_real_time:
                training_transient.append(ii)

            log_file.write(
                str(i) + " running transient anomaly detection " +
                str(datetime.now()) + '\n')
            log_file.flush()
            indices = []

            if ((current_t.hour == 1) or (current_t.hour == 2)
                    or (current_t.hour == 3) or (current_t.hour == 4)
                    or (current_t.hour == 5)):
                indices = get_iqr_anomaly(training_transient)
            else:
                df1 = Series((v for v in training_transient))
                try:
                    results = detect_vec(df1,
                                         period=period_length,
                                         max_anoms=0.02,
                                         direction='both')
                except RuntimeError:
                    #there is something wrong with the data, may be not periodic
                    #print "could not run detect_vec"
                    continue
                temp = results['anoms']
                for index, row in temp.iterrows():
                    indices.append(row['timestamp'])

            anomalies = [
                mm for mm in indices
                if mm >= (period_length * (days_window + 1) - 2)
            ]
            if anomalies:
                #check how many times
                if (anomaly_count_tran[line_index] == 5):
                    print(i, datetime.now(), anomaly_count_tran[line_index],
                          " Transient anomaly ", current_transient)
                    anomaly_count_tran[line_index] = 0
                else:
                    anomaly_count_tran[
                        line_index] = anomaly_count_tran[line_index] + 1
                #else:
                #    print "no anomalies"

        line_index = line_index + 1
    threading.Timer(600, check_error_real_time).start()