Пример #1
0
    def testRecordAPIs(self):
        self.sensor1 = Sensor.Create(self.e, "000-100", self.st.key().id())
        self.sensor1.put()
        now = datetime.now()
        r1_ts = tools.unixtime(now)
        r = Record.Create(tools.unixtime(now), self.sensor1,
                          {'location': '51.5033640,-0.1276250'})
        r2 = Record.Create(
            tools.unixtime(now) + 1000, self.sensor1,
            {'location': '51.5033640,-0.1276250'})
        db.put([r, r2])

        # Test list
        params = self.__commonParams()
        params.update({'sensor_kn': "000-100"})
        result = self.get_json("/api/data", params)
        self.assertTrue(result['success'])
        self.assertEqual(len(result['data']['records']), 2)

        # Test detail
        params = self.__commonParams()
        result = self.get_json("/api/data/%s/%s" % ("000-100", r1_ts), params)
        self.assertTrue(result['success'])
        _r = result['data']['record']
        self.assertEqual(_r['sensor_kn'], "000-100")
        self.assertEqual(_r['ts'], r1_ts)
        self.assertEqual(
            _r['kn'], "%s_%s_%s" %
            (self.e.key().id(), self.sensor1.key().name(), int(r1_ts)))
Пример #2
0
    def testGeoJsonIn(self):
        uri = "/%s/inbox/json/%s" % (self.e.key().id(), TEST_SENSOR_ID)
        lat = 1.3
        lon = 36.9
        MOVE_SIZE = 0.01
        MAX_ACCEL = 10
        N_POINTS = 10
        DELAY_SECS = 1
        now = datetime.now() - timedelta(seconds=60)

        # Populate dummy data with random moves
        data = []
        target_accel_mags = []
        for x in range(N_POINTS):
            now += timedelta(seconds=DELAY_SECS)
            lat += (random.random() - 0.5) * MOVE_SIZE
            lon += (random.random() - 0.5) * MOVE_SIZE
            loc = "%s,%s" % (lat, lon)
            ax = (random.random() * MAX_ACCEL) - MAX_ACCEL / 2
            ay = (random.random() * MAX_ACCEL) - MAX_ACCEL / 2
            az = (random.random() * MAX_ACCEL) - MAX_ACCEL / 2
            accel_mag = math.sqrt(pow(ax, 2) + pow(ay, 2) + pow(az, 2))
            target_accel_mags.append(accel_mag)
            data.append({
                'timestamp': tools.unixtime(dt=now),  # milliseconds
                'location': loc,
                'ax': ax,
                'ay': ay,
                'az': az
            })
        last_loc = loc
        body = json.dumps(data)
        response = self.post(uri, body)
        self.assertEqual(response.status_int, 200)
        content = json.loads(response.normal_body)
        self.assertTrue(content['success'])
        self.assertEqual(content['data']['count'], N_POINTS)

        # Fetch created records from db
        records = Record.Fetch(self.geosensor1)
        self.assertEqual(len(records), N_POINTS)
        last_r = records[0]
        self.assertEqual(tools.unixtime(last_r.dt_recorded),
                         tools.unixtime(now))

        accel_mags = [r.columnValue('accel_mag') for r in records]
        self.assertListEqual(accel_mags, list(reversed(target_accel_mags)))

        # Confirm sensor state update
        self.geosensor1 = Sensor.get(self.geosensor1.key())  # Refetch from db
        self.assertEqual(self.geosensor1.location, db.GeoPt(last_loc))
    def testGeoJsonIn(self):
        uri = "/%s/inbox/json/%s" % (self.e.key().id(), TEST_SENSOR_ID)
        lat = 1.3
        lon = 36.9
        MOVE_SIZE = 0.01
        MAX_ACCEL = 10
        N_POINTS = 10
        DELAY_SECS = 1
        now = datetime.now()

        # Populate dummy data with random moves
        data = []
        target_accel_mags = []
        for x in range(N_POINTS):
            now += timedelta(seconds=DELAY_SECS)
            lat += (random.random()-0.5) * MOVE_SIZE
            lon += (random.random()-0.5) * MOVE_SIZE
            loc = "%s,%s" % (lat, lon)
            ax = (random.random() * MAX_ACCEL) - MAX_ACCEL/2
            ay = (random.random() * MAX_ACCEL) - MAX_ACCEL/2
            az = (random.random() * MAX_ACCEL) - MAX_ACCEL/2
            accel_mag = math.sqrt(pow(ax,2)+pow(ay,2)+pow(az,2))
            target_accel_mags.append(accel_mag)
            data.append({
                'timestamp': tools.unixtime(dt=now),  # milliseconds
                'location': loc,
                'ax': ax,
                'ay': ay,
                'az': az
            })
        last_loc = loc
        body = json.dumps(data)
        response = self.post(uri, body)
        self.assertEqual(response.status_int, 200)
        content = json.loads(response.normal_body)
        self.assertTrue(content['success'])
        self.assertEqual(content['data']['count'], N_POINTS)

        # Fetch created records from db
        records = Record.Fetch(self.geosensor1)
        self.assertEqual(len(records), N_POINTS)
        last_r = records[0]
        self.assertEqual(tools.unixtime(last_r.dt_recorded), tools.unixtime(now))

        accel_mags = [r.columnValue('accel_mag') for r in records]
        self.assertListEqual(accel_mags, list(reversed(target_accel_mags)))

        # Confirm sensor state update
        self.geosensor1 = Sensor.get(self.geosensor1.key())  # Refetch from db
        self.assertEqual(self.geosensor1.location, db.GeoPt(last_loc))
Пример #4
0
    def __init__(self, rkey, start_att="__key__", start_att_direction=""):
        self.report = Report.get(rkey)

        if not self.report:
            logging.error("Error retrieving report [ %s ] from db" % rkey)
            return
        self.report.status = REPORT.GENERATING
        self.report.put()

        self.counters = {'run': 0, 'skipped': 0}
        self.worker_start = tools.unixtime()
        self.cursor = None
        self.start_att = start_att
        self.start_att_direction = start_att_direction
        self.worker_cancelled = False
        self.prefetch_props = []
        self.date_columns = []
        self.headers = []
        self.date_att = None
        self.projection = None
        self.query = None
        self.batch_size = 300
        self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key()
        self.setProgress({'val': 0, "status": REPORT.GENERATING})
        self.gcs_file = gcs.open(self.getGCSFilename(), 'w')
        self.setup()

        # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
        logservice.AUTOFLUSH_ENABLED = True
        logservice.AUTOFLUSH_EVERY_BYTES = None
        logservice.AUTOFLUSH_EVERY_SECONDS = 1
        logservice.AUTOFLUSH_EVERY_BYTES = 1024
        logservice.AUTOFLUSH_EVERY_LINES = 1
Пример #5
0
 def last_deactivation_ts(self, rule_index):
     alarms = self.recent_alarms[rule_index]
     if alarms:
         last_alarm = alarms[0]
         if last_alarm:
             return tools.unixtime(last_alarm.dt_end)
     return None
Пример #6
0
 def __createNewRecords(self,
                        data,
                        first_dt=None,
                        interval_secs=3,
                        sensor=None):
     if not sensor:
         sensor = self.vehicle_1
     now = first_dt if first_dt else datetime.now()
     records = []
     N = len(data.values()[0])
     for i in range(N):
         _r = {}
         for column, vals in data.items():
             _r[column] = vals[i]
         now += timedelta(seconds=interval_secs)
         r = Record.Create(tools.unixtime(now),
                           sensor,
                           _r,
                           allow_future=True)
         records.append(r)
     db.put(records)
     sensor.dt_updated = datetime.now()
     sensor.put()
     logging.debug("Created %d records" % len(records))
     if records:
         return records[-1].dt_recorded  # Datetime of last record created
     else:
         return None
Пример #7
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_gql_query()
            if self.query:
                entities, self.cursor, more = self.KIND.gql(self.query).fetch_page(self.batch_size, start_cursor=self.cursor)
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                else:
                    logging.debug("Got %d rows" % len(entities))
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(tools.normalize_list_to_ascii(ed))
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug("Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed_ms = tools.unixtime() - self.worker_start
                elapsed = elapsed_ms / 1000
                if elapsed >= MAX_REQUEST_SECONDS or (tools.on_dev_server() and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Пример #8
0
 def __createNewRecords(self,
                        data,
                        first_dt=None,
                        interval_secs=3,
                        sensor=None):
     if not sensor:
         sensor = self.vehicle_1
     now = first_dt if first_dt else datetime.now()
     records = []
     N = len(data.values()[0])
     for i in range(N):
         _r = {}
         for column, vals in data.items():
             _r[column] = vals[i]
         if 'ts' in data:
             # If ts passed in record, overrides
             now = util.ts_to_dt(data['ts'])
         else:
             now += timedelta(seconds=interval_secs)
         r = Record.Create(tools.unixtime(now),
                           sensor,
                           _r,
                           allow_future=True)
         records.append(r)
     db.put(records)
     sensor.dt_updated = datetime.now()
     sensor.put()
     logging.debug("Created %d records" % len(records))
     return records[-1]
Пример #9
0
    def run(self, start_cursor=None):
        self.worker_start = tools.unixtime()
        self.cursor = start_cursor

        if not start_cursor:
            self.writeHeaders()

        try:
            # This is heavy
            self.writeData()
        except TooLongError:
            logging.debug("TooLongError: Going to the next batch")
            if self.report:
                self.finish(reportDone=False)
                tools.safe_add_task(self.run,
                                    start_cursor=self._get_cursor(),
                                    _queue="report-queue")
        except Exception, e:  # including DeadlineExceededError
            traceback.print_exc()
            logging.error("Error: %s" % e)
            self.setProgress({
                'error': "Error occurred: %s" % e,
                'status': REPORT.ERROR
            })
            return
Пример #10
0
def update_article(access_token, item_id, action='favorite'):
    '''
    Favorite or archive (mark read) an article
    '''
    actions = json.dumps(
        [
            {
                "action": action,
                "item_id": item_id,
                "time": str(int(tools.unixtime(ms=False)))
            }
        ]
    )
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'actions': actions
    })
    logging.debug(data)
    res = urlfetch.fetch(
        url=MODIFY_ENDPOINT + "?" + data,
        method=urlfetch.GET,
        validate_certificate=True)
    logging.debug(res.content)
    if res.status_code == 200:
        result = json.loads(res.content)
        ok = result.get('status', 0) == 1
        return ok
    else:
        logging.debug(res.headers)
    return False
Пример #11
0
 def run(self):
     self.start = datetime.now()
     self.setup()
     logging.debug("Starting run %s" % self)
     try:
         while True:
             batch = self.fetchBatch()
             if batch:
                 self.runBatch(batch)
                 self.checkDeadline()
             else:
                 self.finish()
                 break
     except (TooLongError, DeadlineExceededError):
         logging.debug("Deadline expired, creating new request... Records: %s, Continuations: %s, Last record: %s" % (self.records_processed, self.continuations, self.last_record))
         self.continuations += 1
         task_name = self.sensorprocess.process_task_name(subset="cont_%s" % tools.unixtime())
         tools.safe_add_task(self.run, _name=task_name, _queue="processing-queue-new")
     except (Shutdown):
         logging.debug("Finishing because instance shutdown...")
         self.finish(result=PROCESS.ERROR, narrative="Instance shutdown")
     except Exception, e:
         logging.error("Uncaught error: %s" % e)
         traceback.print_exc()
         self.finish(result=PROCESS.ERROR, narrative="Processing Error: %s" % e)
Пример #12
0
    def run(self, start_cursor=None):
        self.worker_start = tools.unixtime()
        if self.has_section_files() and len(self.section_gcs_files) != len(self.repeat_sections):
            for section_name, section_questions in self.repeat_sections:
                self.section_gcs_files.append(gcs.open(self.getGCSFilename(suffix=section_name), 'w'))

        self.cursor = start_cursor
        self.setProgress({'max':self.count(), 'report': self.report.json()})
        
        if not start_cursor:
            self.writeHeaders()
        
        try:
            # This is heavy
            self.writeData()
        except TooLongError:
            logging.debug("TooLongError: Going to the next batch")
            if self.report:
                self.finish(reportDone=False)
                tools.safe_add_task(self.run, start_cursor=self._get_cursor(), _queue="worker-queue")
        except Exception, e:  # including DeadlineExceededError
            traceback.print_exc()
            logging.error("Error: %s" % e)
            self.setProgress({'error': "Error occurred: %s" % e, 'status': REPORT.ERROR})
            return
Пример #13
0
    def testNonSaneFutureRecords(self):
        uri = "/%s/inbox/json/%s" % (self.e.key().id(), TEST_SENSOR_ID)
        lat = 1.3
        lon = 36.9
        MOVE_SIZE = 0.01
        N_POINTS = 10
        DELAY_SECS = 1
        now = datetime.now()

        # Populate dummy data with random moves
        data = []
        for x in range(N_POINTS):
            now += timedelta(seconds=DELAY_SECS)
            lat += (random.random() - 0.5) * MOVE_SIZE
            lon += (random.random() - 0.5) * MOVE_SIZE
            loc = "%s,%s" % (lat, lon)
            data.append({
                'timestamp':
                tools.unixtime(dt=now) +
                1000 * 60 * 60 * 24 * 30,  # Non-sane (1 month in future)
                'location':
                loc
            })
        last_loc = loc
        body = json.dumps(data)
        response = self.post(uri, body)
        self.assertEqual(response.status_int, 200)
        content = json.loads(response.normal_body)
        self.assertTrue(content['success'])
        self.assertEqual(content['data']['count'], N_POINTS)

        # Fetch created records from db
        records = Record.Fetch(self.geosensor1)
        self.assertEqual(len(records), 0)  # No records saved, all non-sane
Пример #14
0
 def finish(self, reportDone=True):
     """Called when the worker has finished, to allow for any final work to be done."""
     progress = None
     if reportDone:
         self.gcs_file.close()
         self.report.status = REPORT.DONE
         self.report.dt_generated = datetime.now()
         self.report.put()
         duration = self.report.get_duration()
         logging.debug("GCSReportWorker finished. Counters: %s. Report ran for %d seconds." % (self.counters, duration))
         progress = {
             "status": REPORT.DONE,
             "resource": self.report.get_gcs_file(),
             "generated": tools.unixtime(dt=self.report.dt_generated),
             "report": self.report.json(),
             "duration": duration
         }
     else:
         logging.debug("Batch finished. Counters: %s" % (self.counters))
     p = {
         'val': self.counters['run'],
         "filename": self.report.title
     }
     if progress:
         p.update(progress)
     self.setProgress(p)
     gc.collect()  # Garbage collector
Пример #15
0
def update_article(access_token, item_id, action='favorite'):
    '''
    Favorite or archive (mark read) an article
    '''
    actions = json.dumps(
        [
            {
                "action": action,
                "item_id": item_id,
                "time": str(int(tools.unixtime(ms=False)))
            }
        ]
    )
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'actions': actions
    })
    logging.debug(data)
    res = urlfetch.fetch(
        url=MODIFY_ENDPOINT + "?" + data,
        method=urlfetch.GET,
        validate_certificate=True)
    logging.debug(res.content)
    if res.status_code == 200:
        result = json.loads(res.content)
        ok = result.get('status', 0) == 1
        return ok
    else:
        logging.debug(res.headers)
    return False
Пример #16
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_gql_query()
            if self.query:
                entities, self.cursor, more = self.KIND.gql(self.query).fetch_page(self.batch_size, start_cursor=self.cursor)
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                else:
                    logging.debug("Got %d rows" % len(entities))
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(tools.normalize_list_to_ascii(ed))
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug("Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed_ms = tools.unixtime() - self.worker_start
                elapsed = elapsed_ms / 1000
                if elapsed >= MAX_REQUEST_SECONDS or (tools.on_dev_server() and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Пример #17
0
    def runBatch(self, records):
        '''Run processing on batch of records.

        Processing has two main steps:
            1) Processing each record and firing alarms if any of the tasks'
                rule conditions are met.
            2) For each processer defined, calculate the value as defined by
                the expressions, and update an analysis object with the specified
                key name.

        '''
        # Standard processing (alarms)
        self.new_alarms = []
        for record in records:
            new_alarm = self.processRecord(record)
            if new_alarm:
                self.new_alarms.append(new_alarm)

        # Analysis processing
        if self.processers:
            for processer in self.processers:
                run_ms = tools.unixtime(records[-1].dt_recorded) if records else 0
                self._run_processer(processer, records=records, run_ms=run_ms)

        # TODO: Can we do this in finish?
        db.put(self.analyses.values())

        logging.debug("Ran batch of %d." % (len(records)))
        self.records_processed += len(records)
Пример #18
0
    def processRecord(self, record):
        # Listen for alarms
        # TODO: delays between two data points > NO DATA
        alarm = None
        for i, rule in enumerate(self.rules):
            activate, deactivate, value = self.__update_condition_status(
                i, record)
            if activate:
                alarm, alarm_processers = Alarm.Create(self.sensor, rule,
                                                       record)
                alarm.put()
                if alarm_processers:
                    for processer in alarm_processers:
                        self._run_processer(processer,
                                            run_ms=tools.unixtime(
                                                alarm.dt_start))
                self.active_rules[i] = alarm
                self.recent_alarms[i].insert(0, alarm)  # Prepend
            elif deactivate:
                ar = self.active_rules[i]
                if ar:
                    ar.deactivate()
                    self.updated_alarm_dict[str(ar.key())] = ar
                    self.active_rules[i] = None

        self.last_record = record
        return alarm
    def testUserGoogleSimpleAccountLinking(self):
        import jwt
        user = User.Create(email="*****@*****.**", g_id=USER_GOOGLE_ID)
        user.put()

        creation = int(tools.unixtime(ms=False))
        payload = {
            'iss': 'https://accounts.google.com',
            'aud': secrets.GOOGLE_CLIENT_ID,
            'sub': USER_GOOGLE_ID,
            'email': "*****@*****.**",
            'locale': "en_US",
            "iat": creation,
            "exp": creation + 60 * 60
        }
        params = {
            'grant_type':
            'urn:ietf:params:oauth:grant-type:jwt-bearer',
            'intent':
            'get',
            'assertion':
            jwt.encode(payload,
                       secrets.GOOGLE_CLIENT_SECRET,
                       algorithm='HS256')
        }
        response = self.post_json("/api/auth/google/token", params)
        token_type = response.get('token_type')
        self.assertEqual(token_type, 'bearer')
Пример #20
0
 def finish(self, reportDone=True):
     """Called when the worker has finished, to allow for any final work to be done."""
     progress = None
     if reportDone:
         self.gcs_file.close()
         self.report.status = REPORT.DONE
         self.report.dt_generated = datetime.now()
         self.report.put()
         duration = self.report.get_duration()
         logging.debug(
             "GCSReportWorker finished. Counters: %s. Report ran for %d seconds."
             % (self.counters, duration))
         progress = {
             "status": REPORT.DONE,
             "resource": self.report.get_gcs_file(),
             "generated": tools.unixtime(dt=self.report.dt_generated),
             "report": self.report.json(),
             "duration": duration
         }
     else:
         logging.debug("Batch finished. Counters: %s" % (self.counters))
     p = {'val': self.counters['run'], "filename": self.report.title}
     if progress:
         p.update(progress)
     self.setProgress(p)
     gc.collect()  # Garbage collector
Пример #21
0
 def last_deactivation_ts(self, rule_index):
     alarms = self.recent_alarms[rule_index]
     if alarms:
         last_alarm = alarms[0]
         if last_alarm:
             return tools.unixtime(last_alarm.dt_end)
     return None
Пример #22
0
 def testInSamePeriod(self):
     from constants import RULE
     volley = [
         # dt1, dt2, period_type, expect same (bool)
         (datetime(2016, 3, 31, 12, 15), datetime(2016, 3, 31, 12, 55), RULE.HOUR, True),
         (datetime(2016, 3, 31, 11, 58), datetime(2016, 3, 31, 12, 2), RULE.HOUR, False),
         (datetime(2016, 3, 31, 11, 58, 59), datetime(2016, 3, 31, 11, 58, 13), RULE.MINUTE, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 1), RULE.WEEK, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 4), RULE.WEEK, False),
         (datetime(2016, 1, 2), datetime(2016, 1, 28), RULE.MONTH, True),
         (datetime(2016, 1, 29), datetime(2015, 1, 4), RULE.MONTH, False)
     ]
     for v in volley:
         dt1, dt2, period_type, same = v
         ms1, ms2 = tools.unixtime(dt1), tools.unixtime(dt2)
         out = tools.in_same_period(ms1, ms2, period_type)
         self.assertEqual(out, same)
Пример #23
0
 def __evalAggregateColumn(self, toks):
     column = toks[0]
     if not self.record_list:
         raise Exception("Can't evaluate aggregate column without record list")
     if column == 'ts':
         res = [tools.unixtime(r.dt_recorded) for r in self.record_list]
     else:
         res = [r.columnValue(column, 0) for r in self.record_list]
     return [res]
Пример #24
0
    def post(self, d):
        user = None
        message = email = None
        auth = self.request.get('auth')
        pw = self.request.get('_pw')
        _login = self.request.get('_login')
        token = self.request.get('_token') # Google ID Token
        name = self.request.get('name')
        custom_attrs = self.request.get('custom_attrs')
        if custom_attrs:
            custom_attrs = custom_attrs.split(',')
        else:
            custom_attrs = None
        error_code = 0
        ok = False
        user = User.FuzzyGet(_login)
        if user:
            ok = False
            if (pw and user.validatePassword(pw)):
                ok = True
            elif token:
                ok = services.VerifyGoogleJWT(token, email=email)
                if ok:
                    user.session_id_token = str(token)
                    logging.debug("User token is now: %s" % user.session_id_token)
                else:
                    logging.debug("JWT invalid")
                    # Assume Google certs expired and retry
                    services.UpdateGoogleKeyCerts()
                    error_code = 2 # Bad token
            if ok:
                message = "Successful Login"
                self.session['user'] = user
                self.session['enterprise'] = user.enterprise
            else:
                user = None
                error_code = 1 # Unauthorized
                message = "Login / password mismatch"
        elif token:
            # No user, but this is an authenticated G+ login, so let's create the account
            ok = services.VerifyGoogleJWT(token, email=email)
            if ok:
                user = User.Create(email=email, name=name)
                if user:
                    user.session_id_token = str(token)
                    user.put()
        else:
            message = "User not found"
            error_code = 3

        data = {
            'ts': tools.unixtime(),
            'user': user.json(custom_attrs=custom_attrs) if user else None,
            'password': pw
        }
        self.json_out(data, message=message, error=error_code)
Пример #25
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_query()
            if self.query:
                entities = self.query.fetch(limit=self.batch_size)
                self.cursor = self._get_cursor()
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                else:
                    logging.debug("Got %d rows" % len(entities))
                if entities and self.prefetch_props:
                    entities = tools.prefetch_reference_properties(
                        entities, *self.prefetch_props, missingRefNone=True)
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    string = '?'
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(
                            tools.normalize_list_to_ascii(ed))
                    elif self.report.ftype == REPORT.XLS:
                        self.gcs_file.write(json.dumps(ed) + "\n")
                        if total_i > REPORT.XLS_ROW_LIMIT:
                            self.setProgress({
                                'error':
                                "XLS row limit (%d) exceeded!" %
                                REPORT.XLS_ROW_LIMIT,
                                'status':
                                REPORT.ERROR
                            })
                            return
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug(
                                "Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed_ms = tools.unixtime() - self.worker_start
                elapsed = elapsed_ms / 1000
                if elapsed >= MAX_REQUEST_SECONDS or (
                        tools.on_dev_server()
                        and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Пример #26
0
 def json(self):
     return {
         'id': self.key.id(),
         'ts': tools.unixtime(self.date),
         'host': self.host,
         'path': self.path,
         'method': self.method,
         'status': self.status,
         'request': self.request,
         'success': self.success,
         'message': self.message
     }
Пример #27
0
 def testInSamePeriod(self):
     from constants import RULE
     volley = [
         # dt1, dt2, period_type, expect same (bool)
         (datetime(2016, 3, 31, 12, 15), datetime(2016, 3, 31, 12,
                                                  55), RULE.HOUR, True),
         (datetime(2016, 3, 31, 11, 58), datetime(2016, 3, 31, 12,
                                                  2), RULE.HOUR, False),
         (datetime(2016, 3, 31, 11, 58,
                   59), datetime(2016, 3, 31, 11, 58,
                                 13), RULE.MINUTE, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 1), RULE.WEEK, True),
         (datetime(2016, 3, 29), datetime(2016, 4, 4), RULE.WEEK, False),
         (datetime(2016, 1, 2), datetime(2016, 1, 28), RULE.MONTH, True),
         (datetime(2016, 1, 29), datetime(2015, 1, 4), RULE.MONTH, False)
     ]
     for v in volley:
         dt1, dt2, period_type, same = v
         ms1, ms2 = tools.unixtime(dt1), tools.unixtime(dt2)
         out = tools.in_same_period(ms1, ms2, period_type)
         self.assertEqual(out, same)
Пример #28
0
    def testAggregatedExpressionParsing(self):
        from models import Record
        record_list = []
        start_ms = tools.unixtime()
        ts_data = [
            long(start_ms + x) for x in range(0, 10 * 10 * 1000, 10 * 1000)
        ]  # 10 sec apart
        x_data = [4, 5, 6, 7, 5, 2, 1, 0, 1, 4]
        y_data = [0, 0, 1.0, 1.0, 1.0, 1, 0, 0, 0, 0]
        for i, ts, x, y in zip(range(10), ts_data, x_data, y_data):
            r = Record()
            r.setColumnValue("_ts", ts)
            r.setColumnValue("x", x)
            r.setColumnValue("y", y)
            record_list.append(r)
        now_ms = tools.unixtime()
        import numpy as np
        volley = [
            ["DOT({_ts},{y})", np.dot(ts_data, y_data)],
            ["MAX({y})", max(y_data)],
            ["MIN({y})", 0],
            ["AVE({x})", tools.average(x_data)],
            ["COUNT({y})", 10],
            ["DOT(DELTA({_ts}), {y}) / 1000", 40]  # 40 secs
        ]

        for v in volley:
            expr = v[0]
            target = v[1]
            tick = datetime.now()
            ep = ExpressionParser(expr, verbose=True, run_ms=now_ms)
            result = ep.run(record_list=record_list)
            tock = datetime.now()
            diff = tock - tick
            ms = diff.microseconds / 1000
            logmessage = "%s took %d ms" % (expr, ms)
            if ms > 100:
                logmessage += " <<<<<<<<<<<<<<<<<<<<<<<<<<< SLOW OP!"
            print logmessage
            self.assertEqual(result, target)
Пример #29
0
    def finish(self, reportDone=True):
        """Called when the worker has finished, to allow for any final work to be done."""
        progress = None
        if reportDone:
            if self.report.ftype == REPORT.XLS:
                self.gcs_file.close()
                readable_gcs_file = gcs.open(self.gcs_file.name, 'r')
                data = readable_gcs_file.read().split("\n")
                readable_gcs_file.close()
                self.gcs_file = gcs.open(self.gcs_file.name, 'w')
                y = 0
                for r in data:
                    if not r:
                        continue
                    if y > REPORT.XLS_ROW_LIMIT:
                        logging.warning("Excel report exceeded row limit and was truncated")
                        break
                    y += 1
                    row = []
                    try:
                        row = json.loads(r)
                    except Exception, ex:
                        logging.error("Unable to json load row: %s (%s)" % (r, ex))
                    else:
                        for x, cell in enumerate(row):
                            if cell:
                                if x in self.report.date_columns:
                                    self.ws.write(y, x, cell, self.xls_styles['datetime'])
                                else:
                                    self.ws.write(y, x, cell)            
                        if self.make_sub_reports:
                            #TODO: Write section_work_sheet, survey to excel is not enabled for now though
                            pass
                self.wb.save(self.gcs_file)

            self.gcs_file.close()            
            if self.has_section_files():
                for section_gcs_file in self.section_gcs_files:
                    section_gcs_file.close()

            self.report.status = REPORT.DONE
            self.report.dt_generated = datetime.now()
            self.report.put()
            duration = self.report.getDuration()
            logging.debug("GCSReportWorker finished. Counters: %s. Report ran for %d seconds." % (self.counters, duration))
            progress = {
                "status": REPORT.DONE,
                "resource":self.report.getGCSFile(),
                "generated": tools.unixtime(dt=self.report.dt_generated),
                "report": self.report.json(),
                "duration": duration
            }
Пример #30
0
    def finish(self, reportDone=True):
        """Called when the worker has finished, to allow for any final work to be done."""
        progress = None
        if reportDone:
            if self.report.ftype == REPORT.XLS:
                self.gcs_file.close()
                readable_gcs_file = gcs.open(self.gcs_file.name, 'r')
                data = readable_gcs_file.read().split("\n")
                readable_gcs_file.close()
                self.gcs_file = gcs.open(self.gcs_file.name, 'w')
                y = 0
                for r in data:
                    if not r:
                        continue
                    if y > REPORT.XLS_ROW_LIMIT:
                        logging.warning(
                            "Excel report exceeded row limit and was truncated"
                        )
                        break
                    y += 1
                    row = []
                    try:
                        row = json.loads(r)
                    except Exception, ex:
                        logging.error("Unable to json load row: %s (%s)" %
                                      (r, ex))
                    else:
                        for x, cell in enumerate(row):
                            if cell:
                                if x in self.report.date_columns:
                                    self.ws.write(y, x, cell,
                                                  self.xls_styles['datetime'])
                                else:
                                    self.ws.write(y, x, cell)
                self.wb.save(self.gcs_file)

            self.gcs_file.close()
            self.report.status = REPORT.DONE
            self.report.dt_generated = datetime.now()
            self.report.put()
            duration = self.report.getDuration()
            logging.debug(
                "GCSReportWorker finished. Counters: %s. Report ran for %d seconds."
                % (self.counters, duration))
            progress = {
                "status": REPORT.DONE,
                "resource": self.report.getGCSFile(),
                "generated": tools.unixtime(dt=self.report.dt_generated),
                "report": self.report.json(),
                "duration": duration
            }
Пример #31
0
 def json(self):
     data = {
         'id': self.key.id(),
         'level':self.level,
         'level_name':self.print_level(),
         'name': self.name,
         'email':self.email,
         'phone': self.phone,
         'location_text': self.location_text,
         'ts_created': tools.unixtime(self.dt_created),
         'services_enabled': self.services_enabled,
         'service_settings': tools.getJson(self.service_settings)
     }
     credentials = tools.getJson(self.credentials)
     if credentials:
         data['scopes'] = credentials.get('scopes')
     return data
Пример #32
0
    def writeData(self):
        total_i = self.counters['run']
        while True:
            self.query = self._get_query()
            if self.query:
                entities = self.query.fetch(limit=self.batch_size)
                self.cursor = self._get_cursor()                
                if not entities:
                    logging.debug("No rows returned by query -- done")
                    return
                if entities and self.prefetch_props:
                    entities = tools.prefetch_reference_properties(entities, *self.prefetch_props, missingRefNone=True)
                for entity in entities:
                    if entity:
                        ed = self.entityData(entity)
                    else:
                        continue
                    string = '?'
                    if self.report.ftype == REPORT.CSV:
                        csv.writer(self.gcs_file).writerow(tools.normalize_list_to_ascii(ed))
                        if sections_data and self.has_section_files():
                            for section_gcs_file, sd in zip(self.section_gcs_files, sections_data):
                                for sd_rows in zip(*sd):
                                    csv.writer(section_gcs_file).writerow(tools.normalize_list_to_ascii(sd_rows))
                    elif self.report.ftype == REPORT.XLS:          
                        self.gcs_file.write(json.dumps(ed)+"\n")
                        if total_i > REPORT.XLS_ROW_LIMIT:
                            self.setProgress({'error': "XLS row limit (%d) exceeded!" % REPORT.XLS_ROW_LIMIT, 'status': REPORT.ERROR})
                            return
                    self.gcs_file.flush()

                    total_i += 1
                    self.counters['run'] += 1
                    if total_i % 100 == 0:
                        cancelled = self.updateProgressAndCheckIfCancelled()
                        if cancelled:
                            self.report.CleanDelete()
                            logging.debug("Worker cancelled by user, report deleted.")
                            return

                logging.debug("Batch of %d done" % len(entities))
                elapsed = tools.unixtime(local=False) - self.worker_start
                if elapsed >= MAX_REQUEST_SECONDS or (tools.on_dev_server() and TEST_TOO_LONG_ON_EVERY_BATCH):
                    logging.debug("Elapsed %ss" % elapsed)
                    raise TooLongError()
Пример #33
0
    def __init__(self,
                 rkey,
                 start_att="__key__",
                 start_att_desc=False,
                 title="Report"):
        self.report = rkey.get()
        if not self.report:
            logging.error("Error retrieving report [ %s ] from db" % rkey)
            return
        self.start_att = start_att
        self.start_att_desc = start_att_desc
        self.FILTERS = []
        self.report.status = REPORT.GENERATING
        self.specs = self.report.get_specs()
        self.start_ts = self.specs.get('start', 0)
        self.end_ts = self.specs.get('end', 0)
        self.report.generate_title(title,
                                   ts_start=self.start_ts,
                                   ts_end=self.end_ts)
        self.report.put()
        self.add_date_filters(start=self.start_ts, end=self.end_ts)
        self.user = self.report.key.parent().get()
        self.ancestor = self.user
        self.counters = {'run': 0, 'skipped': 0}
        self.worker_start = tools.unixtime()
        self.cursor = None
        self.worker_cancelled = False
        self.prefetch_props = []
        self.date_columns = []
        self.headers = []
        self.projection = None
        self.cursor = None
        self.query = None
        self.batch_size = 1000
        self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key
        self.setProgress({'val': 0, "status": REPORT.GENERATING})
        self.gcs_file = gcs.open(self.get_gcs_filename(), 'w')

        # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
        logservice.AUTOFLUSH_ENABLED = True
        logservice.AUTOFLUSH_EVERY_BYTES = None
        logservice.AUTOFLUSH_EVERY_SECONDS = 1
        logservice.AUTOFLUSH_EVERY_BYTES = 1024
        logservice.AUTOFLUSH_EVERY_LINES = 1
Пример #34
0
    def run(self, start_cursor=None):
        self.worker_start = tools.unixtime()
        self.cursor = start_cursor

        if not start_cursor:
            self.writeHeaders()

        try:
            # This is heavy
            self.writeData()
        except TooLongError:
            logging.debug("TooLongError: Going to the next batch")
            if self.report:
                self.finish(reportDone=False)
                tools.safe_add_task(self.run, start_cursor=self._get_cursor(), _queue="report-queue")
        except Exception, e:  # including DeadlineExceededError
            traceback.print_exc()
            logging.error("Error: %s" % e)
            self.setProgress({'error': "Error occurred: %s" % e, 'status': REPORT.ERROR})
            return
Пример #35
0
 def pocket_sync(self, d):
     '''
     Sync from pocket since last sync
     '''
     from services import pocket
     TS_KEY = 'pocket_last_timestamp'  # Seconds
     access_token = self.user.get_integration_prop('pocket_access_token')
     init_sync_since = tools.unixtime(datetime.now() - timedelta(days=7), ms=False)
     last_timestamp = self.user.get_integration_prop(TS_KEY, init_sync_since)
     readables = []
     if access_token:
         self.success, readables, latest_timestamp = pocket.sync(self.user, access_token, last_timestamp)
         self.user.set_integration_prop(TS_KEY, latest_timestamp)
         self.user.put()
         self.update_session_user(self.user)
     else:
         self.message = "Please link your Pocket account from the integrations page"
     self.set_response({
         'readables': [r.json() for r in readables]
     })
Пример #36
0
    def __init__(self, rkey, start_att="__key__", start_att_desc=False, title="Report"):
        self.report = rkey.get()
        if not self.report:
            logging.error("Error retrieving report [ %s ] from db" % rkey)
            return
        self.start_att = start_att
        self.start_att_desc = start_att_desc
        self.FILTERS = []
        self.report.status = REPORT.GENERATING
        self.specs = self.report.get_specs()
        self.start_ts = self.specs.get('start', 0)
        self.end_ts = self.specs.get('end', 0)
        self.report.generate_title(title, ts_start=self.start_ts, ts_end=self.end_ts)
        self.report.put()
        self.add_date_filters(start=self.start_ts, end=self.end_ts)
        self.user = self.report.key.parent().get()
        self.ancestor = self.user
        self.counters = {
            'run': 0,
            'skipped': 0
        }
        self.worker_start = tools.unixtime()
        self.cursor = None
        self.worker_cancelled = False
        self.prefetch_props = []
        self.date_columns = []
        self.headers = []
        self.projection = None
        self.cursor = None
        self.query = None
        self.batch_size = 1000
        self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key
        self.setProgress({'val': 0, "status": REPORT.GENERATING})
        self.gcs_file = gcs.open(self.get_gcs_filename(), 'w')

        # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
        logservice.AUTOFLUSH_ENABLED = True
        logservice.AUTOFLUSH_EVERY_BYTES = None
        logservice.AUTOFLUSH_EVERY_SECONDS = 1
        logservice.AUTOFLUSH_EVERY_BYTES = 1024
        logservice.AUTOFLUSH_EVERY_LINES = 1
Пример #37
0
 def __createNewRecords(self, data, first_dt=None, interval_secs=3, sensor=None):
     if not sensor:
         sensor = self.vehicle_1
     now = first_dt if first_dt else datetime.now()
     records = []
     N = len(data.values()[0])
     for i in range(N):
         _r = {}
         for column, vals in data.items():
             _r[column] = vals[i]
         if 'ts' in data:
             # If ts passed in record, overrides
             now = util.ts_to_dt(data['ts'])
         else:
             now += timedelta(seconds=interval_secs)
         r = Record.Create(tools.unixtime(now), sensor, _r, allow_future=True)
         records.append(r)
     db.put(records)
     sensor.dt_updated = datetime.now()
     sensor.put()
     logging.debug("Created %d records" % len(records))
Пример #38
0
 def __init__(self, rkey, start_att="__key__", start_att_direction="", make_sub_reports=False):
     self.report = Report.get(rkey)
     
     if not self.report:
         logging.error("Error retrieving report [ %s ] from db" % rkey)
         return                
     self.report.status = REPORT.GENERATING
     self.report.put()
     
     self.counters = {
         'run': 0,
         'skipped': 0
     }
     self.worker_start = tools.unixtime()
     self.cursor = None
     self.start_att = start_att
     self.start_att_direction = start_att_direction
     self.worker_cancelled = False
     self.prefetch_props = []
     self.date_columns = []
     self.headers = []
     self.date_att = None
     self.projection = None
     self.query = None
     self.batch_size = 300
     self.make_sub_reports = make_sub_reports
     self.report_prog_mckey = MC_EXPORT_STATUS % self.report.key()
     self.setProgress({'val':0, "status":REPORT.GENERATING})
     self.gcs_file = gcs.open(self.getGCSFilename(), 'w')
     self.section_gcs_files = []
     self.setup()
         
     # From: https://code.google.com/p/googleappengine/issues/detail?id=8809
     logservice.AUTOFLUSH_ENABLED = True
     logservice.AUTOFLUSH_EVERY_BYTES = None
     logservice.AUTOFLUSH_EVERY_SECONDS = 1
     logservice.AUTOFLUSH_EVERY_BYTES = 1024
     logservice.AUTOFLUSH_EVERY_LINES = 1
    def testUserGoogleSimpleAccountLinking(self):
        import jwt
        user = User.Create(email="*****@*****.**", g_id=USER_GOOGLE_ID)
        user.put()

        creation = int(tools.unixtime(ms=False))
        payload = {
            'iss': 'https://accounts.google.com',
            'aud': secrets.GOOGLE_CLIENT_ID,
            'sub': USER_GOOGLE_ID,
            'email': "*****@*****.**",
            'locale': "en_US",
            "iat": creation,
            "exp": creation + 60*60
        }
        params = {
            'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer',
            'intent': 'get',
            'assertion': jwt.encode(payload, secrets.GOOGLE_CLIENT_SECRET, algorithm='HS256')
        }
        response = self.post_json("/api/auth/google/token", params)
        token_type = response.get('token_type')
        self.assertEqual(token_type, 'bearer')
Пример #40
0
def sync(user, access_token):
    '''
    Return JSON array {title, author, isbn, image}

    Sample dict from pocket:

    {u'resolved_url': u'https://arxiv.org/abs/1701.06538', u'given_title': u'', u'is_article': u'1', u'sort_id': 16, u'word_count': u'221', u'status': u'0', u'has_image': u'0', u'given_url': u'https://arxiv.org/abs/1701.06538', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1485774143', u'time_updated': u'1485774143', u'time_read': u'0', u'excerpt': u'Authors: Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, Jeff Dean  Abstract: The capacity of a neural network to absorb information is limited by its number of parameters.', u'resolved_title': u'Title: Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer', u'authors': {u'32207876': {u'url': u'', u'author_id': u'32207876', u'item_id': u'1576987151', u'name': u'cscs.CLcs.NEstatstat.ML'}}, u'resolved_id': u'1576987151', u'item_id': u'1576987151', u'time_favorited': u'0', u'is_index': u'0'}
    {u'resolved_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/', u'given_title': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-c', u'is_article': u'1', u'sort_id': 99, u'word_count': u'800', u'status': u'1', u'has_image': u'0', u'given_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/?partner=rss&emc=rss&smid=tw-nytimes', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1349951324', u'time_updated': u'1482284773', u'time_read': u'1482284772', u'excerpt': u'Your clothes, your child\u2019s toys, even the device you use to read these words may have been made in China. They are among the $100 billion of goods that the United States imports from China each year \u2014 an exchange that has become an important issue in the 2012 presidential campaign.', u'resolved_title': u'Looking Into the Eyes of &#8216;Made in China&#8217;', u'authors': {u'3024958': {u'url': u'', u'author_id': u'3024958', u'item_id': u'233921121', u'name': u'KERRI MACDONALD'}}, u'resolved_id': u'233843309', u'item_id': u'233921121', u'time_favorited': u'0', u'is_index': u'0'}
    '''
    dt = datetime.now() - timedelta(days=7)
    init_sync_since = tools.unixtime(dt, ms=False)
    TS_KEY = 'pocket_last_timestamp'  # Seconds
    since_timestamp = user.get_integration_prop(TS_KEY, init_sync_since)
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'detailType': 'complete',
        'since': since_timestamp,
        'state': 'all'
    })
    success = False
    logging.debug("Syncing pocket for %s since %s" % (user, dt))
    res = urlfetch.fetch(
        url=GET_ENDPOINT,
        payload=data,
        method=urlfetch.POST,
        deadline=60,
        validate_certificate=True)
    logging.debug(res.status_code)
    latest_timestamp = 0
    readables = []
    if res.status_code == 200:
        data = json.loads(res.content)
        articles = data.get('list', {})
        latest_timestamp = data.get('since', 0) #?
        save = []
        USE_RESOLVED_TITLE = True
        if articles:
            for id, article in articles.items():
                source = 'pocket'
                if USE_RESOLVED_TITLE:
                    title = article.get('resolved_title')
                else:
                    title = article.get('given_title')
                url = article.get('given_url')
                status = article.get('status')
                authors = article.get('authors')
                excerpt = article.get('excerpt')
                images = article.get('images')
                time_added = int(article.get('time_added', 0)) * 1000
                time_read = int(article.get('time_read', 0)) * 1000
                dt_added = tools.dt_from_ts(time_added)
                dt_read = tools.dt_from_ts(time_read) if time_read else None
                tags = article.get('tags', {}).keys()
                word_count = int(article.get('word_count', 0))
                favorite = int(article.get('favorite', 0)) == 1
                image_url = None
                author = None
                if images:
                    first_image = images.get('1')
                    if first_image:
                        image_url = first_image.get('src')
                if authors:
                    author_keys = authors.keys()
                    if author_keys:
                        author = authors.get(author_keys[0], {}).get('name')
                archived = int(status) == 1
                read = archived and (not tags or 'unread' not in tags)
                r = Readable.CreateOrUpdate(user, source_id=id, title=title, url=url,
                                            image_url=image_url, author=author,
                                            excerpt=excerpt, favorite=favorite,
                                            dt_added=dt_added, word_count=word_count,
                                            dt_read=dt_read,
                                            tags=tags, source=source, read=read)
                if r:
                    r.Update(read=archived, favorite=favorite, dt_read=dt_read)
                    save.append(r)
                    readables.append(r)
        ndb.put_multi(save)  # Save all
        Readable.put_sd_batch(save)
        user.set_integration_prop(TS_KEY, latest_timestamp)
        success = True
    else:
        logging.debug(res.headers)
    return (success, readables, latest_timestamp)
Пример #41
0
def sync(user, access_token):
    '''
    Return JSON array {title, author, isbn, image}

    Sample dict from pocket:

    {u'resolved_url': u'https://arxiv.org/abs/1701.06538', u'given_title': u'', u'is_article': u'1', u'sort_id': 16, u'word_count': u'221', u'status': u'0', u'has_image': u'0', u'given_url': u'https://arxiv.org/abs/1701.06538', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1485774143', u'time_updated': u'1485774143', u'time_read': u'0', u'excerpt': u'Authors: Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, Jeff Dean  Abstract: The capacity of a neural network to absorb information is limited by its number of parameters.', u'resolved_title': u'Title: Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer', u'authors': {u'32207876': {u'url': u'', u'author_id': u'32207876', u'item_id': u'1576987151', u'name': u'cscs.CLcs.NEstatstat.ML'}}, u'resolved_id': u'1576987151', u'item_id': u'1576987151', u'time_favorited': u'0', u'is_index': u'0'}
    {u'resolved_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/', u'given_title': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-c', u'is_article': u'1', u'sort_id': 99, u'word_count': u'800', u'status': u'1', u'has_image': u'0', u'given_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/?partner=rss&emc=rss&smid=tw-nytimes', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1349951324', u'time_updated': u'1482284773', u'time_read': u'1482284772', u'excerpt': u'Your clothes, your child\u2019s toys, even the device you use to read these words may have been made in China. They are among the $100 billion of goods that the United States imports from China each year \u2014 an exchange that has become an important issue in the 2012 presidential campaign.', u'resolved_title': u'Looking Into the Eyes of &#8216;Made in China&#8217;', u'authors': {u'3024958': {u'url': u'', u'author_id': u'3024958', u'item_id': u'233921121', u'name': u'KERRI MACDONALD'}}, u'resolved_id': u'233843309', u'item_id': u'233921121', u'time_favorited': u'0', u'is_index': u'0'}
    '''
    dt = datetime.now() - timedelta(days=7)
    init_sync_since = tools.unixtime(dt, ms=False)
    TS_KEY = 'pocket_last_timestamp'  # Seconds
    since_timestamp = user.get_integration_prop(TS_KEY, init_sync_since)
    data = urllib.urlencode({
        'access_token': access_token,
        'consumer_key': POCKET_CONSUMER_KEY,
        'detailType': 'complete',
        'since': since_timestamp,
        'state': 'all'
    })
    success = False
    logging.debug("Syncing pocket for %s since %s" % (user, dt))
    res = urlfetch.fetch(url=GET_ENDPOINT,
                         payload=data,
                         method=urlfetch.POST,
                         deadline=60,
                         validate_certificate=True)
    logging.debug(res.status_code)
    latest_timestamp = 0
    readables = []
    if res.status_code == 200:
        data = json.loads(res.content)
        articles = data.get('list', {})
        latest_timestamp = data.get('since', 0)  #?
        save = []
        USE_RESOLVED_TITLE = True
        if articles:
            for id, article in articles.items():
                source = 'pocket'
                if USE_RESOLVED_TITLE:
                    title = article.get('resolved_title')
                else:
                    title = article.get('given_title')
                url = article.get('given_url')
                status = article.get('status')
                authors = article.get('authors')
                excerpt = article.get('excerpt')
                images = article.get('images')
                time_added = int(article.get('time_added', 0)) * 1000
                time_read = int(article.get('time_read', 0)) * 1000
                dt_added = tools.dt_from_ts(time_added)
                dt_read = tools.dt_from_ts(time_read) if time_read else None
                tags = article.get('tags', {}).keys()
                word_count = int(article.get('word_count', 0))
                favorite = int(article.get('favorite', 0)) == 1
                image_url = None
                author = None
                if images:
                    first_image = images.get('1')
                    if first_image:
                        image_url = first_image.get('src')
                if authors:
                    author_keys = authors.keys()
                    if author_keys:
                        author = authors.get(author_keys[0], {}).get('name')
                archived = int(status) == 1
                read = archived and (not tags or 'unread' not in tags)
                r = Readable.CreateOrUpdate(user,
                                            source_id=id,
                                            title=title,
                                            url=url,
                                            image_url=image_url,
                                            author=author,
                                            excerpt=excerpt,
                                            favorite=favorite,
                                            dt_added=dt_added,
                                            word_count=word_count,
                                            dt_read=dt_read,
                                            tags=tags,
                                            source=source,
                                            read=read)
                if r:
                    r.Update(read=archived, favorite=favorite, dt_read=dt_read)
                    save.append(r)
                    readables.append(r)
        ndb.put_multi(save)  # Save all
        Readable.put_sd_batch(save)
        user.set_integration_prop(TS_KEY, latest_timestamp)
        success = True
    else:
        logging.debug(res.headers)
    return (success, readables, latest_timestamp)
Пример #42
0
 def __evalFunction(self, toks):
     val = toks[0]
     fnName = val[0].upper()
     args = val[1:]
     args = [arg for arg in args if arg is not None]  # Filter nones
     if not args:
         return 0
     if fnName == 'SUM':
         args = self.__getArglist(args)
         if args:
             return [sum(args)]
         return [0]
     elif fnName == 'AVE':
         from tools import average
         args = self.__getArglist(args)
         if args:
             return [average(args)]
         return [0]
     elif fnName == 'MAX':
         args = self.__getArglist(args)
         if args:
             res = max(args)
             return [res]
         return [0]
     elif fnName == "MIN":
         args = self.__getArglist(args)
         if args:
             return [min(args)]
         return [0]
     elif fnName == "COUNT":
         args = self.__getArglist(args)
         return [len(args)]
     elif fnName == "ALARMS":
         from models import Alarm
         # Usage: ALARMS([rule_id])
         # Returns list of alarms in processed batch, optionally filtered by rule_id
         alarm_list = list(self.alarm_list)
         if args and type(args[0]) in [int, long, float]:
             rule_id = int(args[0])
             if rule_id:
                 alarm_list = [
                     al for al in alarm_list if tools.getKey(
                         Alarm, 'rule', al, asID=True) == rule_id
                 ]
         return [alarm_list]
     elif fnName == "DISTANCE":
         dist = 0
         last_gp = None
         args = self.__getArglist(args)
         for gp in args:
             gp = tools.safe_geopoint(gp)
             if last_gp and gp:
                 dist += tools.calcLocDistance(last_gp, gp)
             if gp:
                 last_gp = gp
         return [dist]  # m
     elif fnName == "SQRT":
         arg = args[0]
         return [math.sqrt(arg)]
     elif fnName == "SINCE":
         # Returns ms since event (argument), or 0 if none found
         event = args[0]
         since = 0
         now = self.run_ms
         try:
             if event:
                 if type(event) in [long, float]:
                     # Treat as ms timestamp
                     since = now - event
                 elif isinstance(event, basestring):
                     pass
                 elif event.kind() == 'Alarm':
                     since = now - tools.unixtime(event.dt_start)
                 elif event.kind() == 'Record':
                     since = now - tools.unixtime(event.dt_recorded)
         except Exception, e:
             logging.warning("Error in SINCE() - %s" % e)
         return [since]
Пример #43
0
    def testSimpleExpressionParsing(self):
        from models import Record
        r = Record()
        x = 5
        y = -2
        z = 3.5
        r.setColumnValue("x", x)
        r.setColumnValue("y", y)
        r.setColumnValue("z", z)
        now_ms = tools.unixtime()
        volley = [
            ["1 + 1", (1 + 1)],
            ["1 + 1 + 5", (1 + 1 + 5)],
            ["2 * 8 + 3", (2 * 8) + 3],
            ["4 + 5 * 2", 4 + (5 * 2)],
            ["40000 / 1000", 40],
            ["2^3", (pow(2, 3))],
            ["(8/2)*3 + 9", ((8 / 2) * 3 + 9)],
            ["[x]^2", (pow(x, 2))],
            ["'a' * 3", 0],  # Non-numeric, treat as 0
            ["3.0 * 3", 9],
            ["SQRT([x]^2 + [y]^2)",
             math.sqrt(pow(x, 2) + pow(y, 2))],
            ["5 > 2", True],
            ["5 > 6", False],
            ["(3*5) < 20", True],
            ["[x] > 100", False],
            ["(3*5) < 20 AND [x] > 100", False],
            ["(3*5) < 20 AND [x] > 0 AND [x] > 1", True],
            ["1==1 OR 1==3 AND 2==0", True],
            ["(1==1 OR 1==3) AND 2==2", True],
            ["(1==2 AND 1==3) OR 2==2", True],
            ["(1==1 OR 1==1) AND 1==0", False],
            ["1==1 OR 1==1 AND 1==0", True],  # And first
            ["1==1 OR (1==1 AND 1==0)", True],
            ["1 == 2 OR [x] > 100 OR [x] > 1", True],
            ["\"ONE\" == \"ONE\"", True],
            ["\"ONE / (1)\" == \"ONE / (1)\"", True],
            ["1==2 OR 1==1 OR 1==4 OR 1==5", True],
            ["SINCE(1467011405000)", now_ms - 1467011405000],
            ["SQRT([x]^2 + [y]^2)", (math.sqrt(pow(x, 2) + pow(y, 2)))],
            [
                "SQRT([x]^2 + [y]^2 + 8^2)",
                (math.sqrt(pow(x, 2) + pow(y, 2) + pow(8, 2)))
            ],
            [
                "SQRT([x]^2 + [y]^2 + [z]^2)",
                (math.sqrt(pow(x, 2) + pow(y, 2) + pow(z, 2)))
            ]
        ]

        for v in volley:
            expr = v[0]
            target = v[1]
            tick = datetime.now()
            ep = ExpressionParser(expr, verbose=True, run_ms=now_ms)
            result = ep.run(r)
            tock = datetime.now()
            diff = tock - tick
            ms = diff.microseconds / 1000
            logmessage = "%s took %d ms" % (expr, ms)
            if ms > 100:
                logmessage += " <<<<<<<<<<<<<<<<<<<<<<<<<<< SLOW OP!"
            print logmessage
            self.assertEqual(result, target)