Exemplo n.º 1
0
 def parse_value(self, value):
     for parser in self.parsers:
         try:
             parser().check_value(value)
             return parser().parse_value(value)
         except ValueError:
             pass
     else:  # pragma: no cover
         raise RuntimeError()
Exemplo n.º 2
0
 def check(self, token):
     for parser in self.parsers:
         try:
             parser().check(token)
             return
         except ParseError:
             pass
     else:
         raise ParseError(*token[0])
Exemplo n.º 3
0
 def check_value(self, value):
     for parser in self.parsers:
         try:
             parser().check_value(value)
             return
         except ValueError:
             pass
     else:
         raise ValueError()
Exemplo n.º 4
0
    def get_parser(self, value):
        if isinstance(value, type):
            check = issubclass
        else:
            check = isinstance

        for t, parser in self.parsers:
            if check(value, t):
                if isinstance(value, type):
                    return parser()
                else:
                    return parser(default=value)
        else:
            return self.default_parser_class(value)
Exemplo n.º 5
0
def tokenizeText(sample):
    # get the tokens using spaCy
    tokens = parser(cleanText(sample))

    # lemmatize
    lemmas = []
    for tok in tokens:
        lemmas.append(tok.lemma_.lower().strip() if tok.lemma_ != "-PRON-" else tok.lower_)
    tokens = lemmas

    # stoplist the tokens
    tokens = [tok for tok in tokens if tok not in STOPLIST]

    # stoplist symbols
    tokens = [tok for tok in tokens if tok not in SYMBOLS]

    # remove large strings of whitespace
    while "" in tokens:
        tokens.remove("")
    while " " in tokens:
        tokens.remove(" ")
    while "\n" in tokens:
        tokens.remove("\n")
    while "\n\n" in tokens:
        tokens.remove("\n\n")

    return tokens
Exemplo n.º 6
0
    def parse_and_save_cal(self):
        print(self.qwestCal.toPlainText())
        self.parse_engine = parser(self.qwestCal.toPlainText())
        summ = [self.combo1.currentText(),self.combo1.currentIndex()]
        disc = self.description.text()        
        if summ[1] == 0:
            summ = '$ccode'
        elif summ[1] == 1:
            summ = '$cname'
        elif summ[1] == 2:
            summ = '$ccode - $cname'
        else:
            summ = summ[0]

        dd = QtGui.QFileDialog.getSaveFileName()
        if dd[0].split('.') == '.ics':
            dd = dd[0].split('.')[0]
        else:
            dd = dd[0]

        with open(dd+'.ics', 'w') as f:
            f.write('BEGIN:VCALENDAR\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\nVERSION:1.0\nCALSCALE:GREGORIAN')
            for x in self.parse_engine.get_event_list():
                f.write(x.print_ics(summ,disc))
            f.write('END:VCALENDAR')
Exemplo n.º 7
0
def _parse_value(s):
    s = s.strip('"')
    for parser in [int, float]:
        try:
            return parser(s)
        except ValueError:
            pass
    return s
Exemplo n.º 8
0
 def parse_setting(self, name, default, parser):
     raw_value = self.settings.get(name, None)
     if raw_value is None:
         return default
     try:
         return parser(raw_value)
     except (TypeError, ValueError):
         return default
Exemplo n.º 9
0
 def __call__(self, token):
     self.check(token)
     for parser in self.parsers:
         try:
             return parser()(token)
         except ParseError:
             pass
     else:
         raise ParseError(*token[0])
Exemplo n.º 10
0
def parse(timestr, parserinfo=None, **kwargs):
    if isinstance(timestr, six.binary_type):
        timestr = timestr.decode()
    if isinstance(timestr, six.string_types):
        try:
            if parserinfo:
                ret = parser(parserinfo).parse(timestr, **kwargs)
            else:
                ret = parser().parse(timestr, **kwargs)
        except TypeError:
            raise ValueError("Can't parse date from string '%s'" % timestr)
    elif isinstance(timestr, int) or isinstance(timestr, float):
        ret = datetime.fromtimestamp(timestr)
    elif isinstance(timestr, datetime) or isinstance(timestr, date):
        ret = timestr
    else:
        raise TypeError("Can't convert %s to date." % type(timestr))
    return ret
Exemplo n.º 11
0
    def testCustomParserInfo(self):
        # Custom parser info wasn't working, as Michael Elsdörfer discovered.
        from dateutil.parser import parserinfo, parser

        class myparserinfo(parserinfo):
            MONTHS = parserinfo.MONTHS[:]
            MONTHS[0] = ("Foo", "Foo")
        myparser = parser(myparserinfo())
        dt = myparser.parse("01/Foo/2007")
        self.assertEqual(dt, datetime(2007, 1, 1))
Exemplo n.º 12
0
def day_iter(hd):
    '''
    Yields a dataframe of all recorded messages from a given day. Adds additional field to output df: timestamp (derived from time field).
    :param hd:
    :return: (response_df, day_date)
    '''
    from dateutil import parser
    dates = hd.date.apply(str_to_date)
    hd['datestamp'] = dates.apply(lambda t: parser(t).timestamp())
    max_date = max(dates)
    current_date = min(dates)
    while current_date <= max_date:
        day = into_mega_df(
            hd.query("datestamp == {}",
                     parser(current_date).timestamp()))
        day['timestamp'] = day.time.apply(lambda t: parser(t).timestamp())
        day.sort_values("timestamp", inplace=True)
        yield (day, current_date)
        current_date = current_date + datetime.timedelta(days=1)
Exemplo n.º 13
0
    def testCustomParserInfo(self):
        # Custom parser info wasn't working, as Michael Elsdörfer discovered.
        from dateutil.parser import parserinfo, parser

        class myparserinfo(parserinfo):
            MONTHS = parserinfo.MONTHS[:]
            MONTHS[0] = ("Foo", "Foo")
        myparser = parser(myparserinfo())
        dt = myparser.parse("01/Foo/2007")
        self.assertEqual(dt, datetime(2007, 1, 1))
Exemplo n.º 14
0
def parse_aggregate_records(aggregate, headers):
    file_format = detect_aggregate_file_format(aggregate)
    if file_format == 0:
        parser = parse_aggregate_v1
    elif file_format == 1:
        parser = parse_aggregate_v2
    else:
        print("Unrecognized file format for file {}, skipping...", aggregate)
        return

    return parser(aggregate, headers)
Exemplo n.º 15
0
class dateExtractor():
    p = parser()
    info = p.info

    def timetoken(self, token):
        try:
            float(token)
            return True
        except ValueError:
            pass

        return any(
            f(token)
            for f in (self.info.jump, self.info.weekday, self.info.month,
                      self.info.hms, self.info.ampm, self.info.pertain,
                      self.info.utczone, self.info.tzoffset))

    def timesplit(self, input_string):
        batch = []
        for token in _timelex(input_string):
            if self.timetoken(token):
                if self.info.jump(token):
                    continue
                batch.append(token)
            else:
                if batch:
                    yield " ".join(batch)
                    batch = []
        if batch:
            yield " ".join(batch)

    def extract_dates_from(self, fuzzy_input):

        # set the default date to fill inconsitent valid dates
        DEFAULT = datetime(2018, 1, 1)

        # prepare string for extraction
        fuzzy_input = fuzzy_input.replace(' ', '')
        fuzzy_input = fuzzy_input.replace('-', '__delimitor__')
        fuzzy_input = fuzzy_input.replace(',', '__delimitor__')
        fuzzy_input = fuzzy_input.lower().replace(
            'present',
            datetime.now().strftime('%Y-%m-%d'))

        found_dates = []
        for item in self.timesplit(fuzzy_input):
            try:
                # use parse library with convinient arguments
                found_dates.append(
                    self.p.parse(item, fuzzy=True, default=DEFAULT))
            except Exception:
                continue

        return found_dates
Exemplo n.º 16
0
def parse(timestr, parserinfo=None, **kwargs):
    if isinstance(timestr, six.binary_type):
        timestr = timestr.decode()
    if isinstance(timestr, six.string_types):
        try:
            if parserinfo:
                ret = parser(parserinfo).parse(timestr, **kwargs)
            else:
                ret = parser().parse(timestr, **kwargs)
        except TypeError:
            raise ValueError("Can't parse date from string '%s'" % timestr)
    elif isinstance(timestr, int) or isinstance(timestr, float):
        ret = datetime.fromtimestamp(timestr)
    elif isinstance(timestr, datetime) or isinstance(timestr, date):
        ret = timestr
    elif isinstance(timestr, time):
        ret = timestr
    else:
        raise TypeError("Can't convert %s to date." % type(timestr))
    return ret
Exemplo n.º 17
0
    def __iter__(self):
        """Iterate over all of the lines in the file"""

        self.start()

        parser = self.make_fw_row_parser()

        for line in self._fstor.open(mode='r', encoding=self.spec.encoding):

            yield [e.strip() for e in parser(line)]

        self.finish()
Exemplo n.º 18
0
def parse(self, create_data: dict):
    """
    Convert raw log content into usable data.
    :param self: Task instance, supplied by Celery.
    :param create_data: Default log parameters.
    :return: Parsed data.
    """
    log_type = create_data['log_type']
    parser = getattr(handlers, log_type)
    content = create_data['content']
    match_data = parser(content, ProgressRecorder(self))
    return match_data
Exemplo n.º 19
0
 def main(self, args, app):
     """
 Main logic for calculating agp
 """
     # print args
     # get parameters
     params = self.get_params(args)
     # print params.get('input')
     # create calculator
     parser = AGP()
     with open(params.get('input'), 'r') as f:
         # calculate agp for all input
         return parser(f.readlines())
Exemplo n.º 20
0
 def main (self, args, app):
   """
   Main logic for calculating agp
   """
   # print args
   # get parameters
   params = self.get_params(args)
   # print params.get('input')
   # create calculator
   parser = AGP( )
   with open(params.get('input'), 'r') as f:
     # calculate agp for all input
     return parser(f.readlines())
Exemplo n.º 21
0
    def __call__(self, lines):
        result = self.get_defaults()

        for token in tokenize(lines):
            key = get_key(token)

            if key in self:
                parser = self[key]
            else:
                parser = self.default_parser_class()

            result[key] = parser(token)

        return result
Exemplo n.º 22
0
    def testCustomParserShortDaynames(self):
        # Horacio Hoyos discovered that day names shorter than 3 characters,
        # for example two letter German day name abbreviations, don't work:
        # https://github.com/dateutil/dateutil/issues/343
        from dateutil.parser import parserinfo, parser

        class GermanParserInfo(parserinfo):
            WEEKDAYS = [("Mo", "Montag"),
                        ("Di", "Dienstag"), ("Mi", "Mittwoch"),
                        ("Do", "Donnerstag"), ("Fr", "Freitag"),
                        ("Sa", "Samstag"), ("So", "Sonntag")]

        myparser = parser(GermanParserInfo())
        dt = myparser.parse("Sa 21. Jan 2017")
        self.assertEqual(dt, datetime(2017, 1, 21))
Exemplo n.º 23
0
def parse_ambit_samples(file_obj):
    # ambit data is not valid xml. need to add a fake top level entry.
    tree = ET.parse(file_obj)
    item = {}
    for sample in tree.find('samples'):
        is_gps_sample = (sample.find('Latitude') is not None)
        for child in sample:
            parser = PARSERS.get(child.tag)
            if not callable(parser):
                continue
            item[child.tag] = parser(child.text)
        if is_gps_sample:
            # merging data. gps samples have full seconds, so it's kinda nicer...
            yield item
            item = {}
Exemplo n.º 24
0
def main():
    # Create the output dir
    os.makedirs(PROCESSED_PATH, exist_ok=True)

    # parse_map contains per-file parsers. For example, visibility is parsed differently
    # from air temperature. The key of the map are the file names which are attached
    # to their respective parsing functions.
    parse_map = {
        RAW_FILE_NAME_AIR_TEMPERATURE: parseAirTemperature,
        RAW_FILE_NAME_PRECIPITATION: parsePrecipitation,
        RAW_FILE_NAME_VISIBILITY: parseVisibility,
        RAW_FILE_NAME_WIND_DIRECTION: parseWindDirection,
        RAW_FILE_NAME_WIND_SPEED: parseWindSpeed,
    }

    # RAW_PATH is a dir containing folders of location (e.g. OSL, TRD).
    # These locations are dirs containing weather info (e.g. air_temperature.csv etc)
    for location in os.listdir(RAW_PATH):
        print("\nProcessing weather data for %s" % location)

        # Data is a dict containing parsed dataframes for each weather category
        # data['air_temperature'] = pd.Dateframe
        data = {}
        for file_name in os.listdir('%s/%s' % (RAW_PATH, location)):
            df = pd.read_csv('%s/%s/%s' % (RAW_PATH, location, file_name))

            print('Parsing %s' % file_name)
            parser = parse_map[file_name]  # Get appropriate parser for file

            # Parse data (key can for example be AIR_TEMPERATURE)
            (df, key) = parser(df)

            # Do common parsing for all frames
            dropExcessColumns(df)
            df[REFERENCE_TIME] = df.apply(removeTimeZone, axis=1)

            # Attach dataframe to the data object
            data[key] = df

        if data:
            # Data should now be a dict containing data about
            # all weather categories for this location. Let's merge them
            # and save the output as <location>.csv
            print("Merging data...")
            merged = mergeDataFrames(data)
            merged.to_csv('%s/%s.csv' % (PROCESSED_PATH, location),
                          index=False)
Exemplo n.º 25
0
def regex_file(path, parser=parse_txt):
    path = os.path.abspath(path)
    file_name, file_lines = parser(path)  #
    for line_num, line in enumerate(file_lines):
        for match_info in regex_text(line):
            yield [path,
                   file_name,
                   match_info['REGEX_LABEL'],
                   line_num,
                   match_info['MATCH'],
                   match_info['START'],
                   match_info['END'],
                   match_info['MATCH_LEN'],
                   match_info['NORM_TEXT_LEN'],
                   match_info['CONTEXT'],
                   match_info['PARSED'],
                   ]
Exemplo n.º 26
0
def envelopes(request):
    '''
        Loads the envelopes page
    '''
    token = 'Token token=' + request.session.get('api_token')
    path = 'https://simplifiapi2.herokuapp.com/user_envelopes'
    req = requests.get(path, headers={'Authorization': token})
    data = req.json()
    data = sorted(data, key=lambda x: x['envelope_name'].upper())
    load_data = parser(data)
    data = json.dumps(load_data['data'])

    context = {'envelopes': load_data['data'], 'data': data}
    if request.GET.get('type'):
        return render(request, 'envelopes_ajax.html', context)
    else:
        return render(request, 'envelopes.html', context)
Exemplo n.º 27
0
def goals_remove_confirm(request, goal_id):
    '''
        Loads the the modal to remove goal
    '''
    if not request.session.get('api_token'):
        return HttpResponseRedirect('/')
    token = 'Token token=' + request.session.get('api_token')
    path = 'https://simplifiapi2.herokuapp.com/goals/' + goal_id
    header = {'Content-type': 'application/json', 'Authorization': token}
    req = requests.get(path, headers=header)
    if req.ok is False:
        context = {'error': req.status_code}
    else:
        data = req.json()
        load_data = parser(data)
        context = {'goal': load_data['data']}
    return render(request, 'goals_remove.html', context)
Exemplo n.º 28
0
def transactions_remove_confirm(request, transaction_id):
    '''
        Loads the the modal to add new account
    '''
    if not request.session.get('api_token'):
        return HttpResponseRedirect('/')
    # Get token and transaction for deletion
    token = 'Token token=' + request.session.get('api_token')
    path = 'https://simplifiapi2.herokuapp.com/account_transactions/' + transaction_id
    header = {'Content-type': 'application/json', 'Authorization': token}
    req = requests.get(path, headers=header)
    if req.ok is False:
        context = {'error': req.status_code}
    else:
        data = req.json()
        load_data = parser(data)
        context = {'transaction': load_data['data']}
    return render(request, 'transactions_remove.html', context)
Exemplo n.º 29
0
def transactions(request):
    '''
        Loads the my spending page
    '''
    if not request.session.get('api_token'):
        return HttpResponseRedirect('/')
    token = 'Token token=' + request.session.get('api_token')
    path = 'https://simplifiapi2.herokuapp.com/account_transactions'
    req = requests.get(path, headers={'Authorization': token})
    data = req.json()
    data = sorted(data, key=lambda x: x['date'].upper())
    load_data = parser(data)

    context = {'transactions': load_data['data']}
    if request.GET.get('type'):
        return render(request, 'transactions_ajax.html', context)
    else:
        return render(request, 'transactions.html', context)
Exemplo n.º 30
0
def parse_ambit_samples(file_obj):
    # ambit data is not valid xml. need to add a fake top level entry.
    tree = ET.parse(file_obj)
    item = {}
    # very old format (not sure if it still works...)
    samples = tree.find("samples")
    # either SML (having DeviceLog -> Samples) or a bit older (just Samples)
    if samples is None:
        samples = tree.find(".//Samples")
    for sample in samples:
        is_gps_sample = sample.find("Latitude") is not None
        for child in sample:
            parser = PARSERS.get(child.tag)
            if not callable(parser):
                continue
            item[child.tag] = parser(child.text)
        if is_gps_sample:
            # merging data. gps samples have full seconds, so it's kinda nicer...
            yield item
            item = {}
Exemplo n.º 31
0
def parse_ambit_samples(file_obj):
    # ambit data is not valid xml. need to add a fake top level entry.
    tree = ET.parse(file_obj)
    item = {}
    # very old format (not sure if it still works...)
    samples = tree.find('samples')
    # either SML (having DeviceLog -> Samples) or a bit older (just Samples)
    if samples is None:
        samples = tree.find('.//Samples')
    for sample in samples:
        is_gps_sample = (sample.find('Latitude') is not None)
        for child in sample:
            parser = PARSERS.get(child.tag)
            if not callable(parser):
                continue
            item[child.tag] = parser(child.text)
        if is_gps_sample:
            # merging data. gps samples have full seconds, so it's kinda nicer...
            yield item
            item = {}
Exemplo n.º 32
0
 def parse_response(self, response):
     parser = None
     # Handle login status.
     if response.status == 400:
         req_once_logged = response.meta['req_once_logged'] if 'req_once_logged' in response.meta else response.request
         self.logger.warning("%s: HTTP 400 at %s. Going to index page. Error message: %s" % (self.login['username'], response.url, self.get_text(response.xpath(".//body/text()").extract())))
         yield self.make_request(url='index', response=response, req_once_logged=req_once_logged, shared = False, dont_filter = True)
     elif self.islogged(response) is False:
         self.loggedin = False
         req_once_logged = response.meta['req_once_logged'] if 'req_once_logged' in response.meta else response.request
         if self.is_login_page(response) is False:
             self.logger.info('Not logged in. Going to login page.')
             yield self.make_request(reqtype='loginpage', response=response, req_once_logged=req_once_logged)
         elif self.is_login_page(response) is True:
             if self.is_login_page(response) is True and self.login_failed(response) is True:
                 self.logger.info('Failed last login as %s. Trying again. Error: %s' % (self.login['username'], self.get_text(response.xpath('.//p[@class="error"]'))))
             if self.logintrial > self.settings['MAX_LOGIN_RETRY']:
                 self.wait_for_input("Too many login failed", req_once_logged)
                 self.logintrial = 0
                 return
             self.logger.info("Trying to login as %s." % self.login['username'])
             self.logintrial += 1
             yield self.make_request(reqtype='dologin', response=response, req_once_logged=req_once_logged)
     else:
         self.loggedin = True
         if response.meta['reqtype'] == 'dologin':
             self.logger.info("Succesfully logged in as %s! Returning to stored request %s" % (self.login['username'], response.meta['req_once_logged']))
             if response.meta['req_once_logged'] is None:
                 self.logger.warning("We are trying to yield a None. This should not happen.")
             yield response.meta['req_once_logged']
         else:
             if self.is_threadlisting(response) is True:
                 parser = self.parse_threadlisting
             elif self.is_message(response) is True:
                 parser = self.parse_message
             elif self.is_user(response) is True:
                 parser = self.parse_user
             # Yield the appropriate parsing function.
             if parser is not None:
                 for x in parser(response):
                     yield x
Exemplo n.º 33
0
def update_schedule(view, data):
    if data is None:
        view.flash('Saved changes.')
        return

    if data == "":
        view.flash(u"Empty data file was given!.", "warning")
        return

    parsers = [
        parse_json, parse_csv, parse_locations, parse_newcsv, parse_pms_json]
    result = None
    for parser in parsers:
        try:
            result = parser(view.context, data)
        except InvalidParserError, e:
            continue
        except ScheduleImportError, e:
            for flash_params in e.messages:
                view.flash(*flash_params)
            return
Exemplo n.º 34
0
def question(request):
    file = BytesIO(request.body)
    user_question = sr.AudioFile(file)

    recognizer_instance = sr.Recognizer()
    recognizer_instance.energy_threshold = 200

    with user_question as source:
        audio = recognizer_instance.record(source)

    try:
        stringy = recognizer_instance.recognize_google(audio, language='ro-RO')
    except sr.UnknownValueError:
        return HttpResponse('{ "message": "Speech is unintelligible." }')
    except sr.RequestError:
        return HttpResponse('{ "message": "Speech recognition failed." }')

    stringy = strip_accents(stringy)

    file.close()
    return HttpResponse('{ "message": "' + parser(stringy) + '" }')
Exemplo n.º 35
0
    def get_parser(self, response_headers, response_body):
        # sanitize response headers
        response_headers = {k.lower(): v for k, v in response_headers.items()}

        # check response type, should be HTML
        # @TODO: HTML check

        if 'server' in response_headers:
            if response_headers['server'].startswith('apache'):
                response_headers['server'] = 'apache'
            elif response_headers['server'].startswith('nginx'):
                response_headers['server'] = 'nginx'
            else:
                response_headers['server'] = 'unknown'
        else:
            response_headers['server'] = 'unknown'

        if response_headers['server'] in self.parsers:
            parsers = self.parsers[response_headers['server']]

            for parser in parsers:
                try:
                    data = parser(response_body)
                    if data:
                        self.parser = parser

                        return self.parser
                except Exception as ex:
                    pass

        # no suitable index parser found at this point, do some regex on <a> tags
        try:
            data = self._parse4(response_body)
            if data:
                self.parser = self._parse4

                return self.parser
        except:
            pass
Exemplo n.º 36
0
    def buildQuery(self, conditions: List[tuple]):
        for element in conditions:
            variable = element[0]
            condition = element[1]
            value = element[2]

            if variable == 'id' and condition in [
                    '>', '<', '=', 'IN', 'NOTIN', 'BETWEEN'
            ]:
                query_builder.pickWhereCondition(variable, condition, value)
            elif variable == 'url' and condition == '=':
                query_builder.pickWhereCondition(variable, condition, value)
            elif variable == 'date' and condition in [
                    '>', '<', '=', 'BETWEEN'
            ]:
                date = parser(variable)
                date_string = f'{date.year}-{date.month}-{date.day}'

                query_builder.pickWhereCondition(date_string, condition, value)
            elif variable == 'rating' and condition in [
                    '>', '<', '=', 'BETWEEN'
            ]:
                query_builder.pickWhereCondition(variable, condition, value)
Exemplo n.º 37
0
    def readAvailableData(self):
        while True:
            try:
                r = self.reader.next()
            except StopIteration:
                break
            self.lineNo += 1

            if self.timeParser is None:
                self.timeParser = self._detectTimeType(r[0])
            t = self.timeParser(r[0])

            i = 0
            for rawValue in r[1:]:
                if len(self.columnParsers) <= i:
                    # initialize list of possible parsers for this column:
                    self.columnParsers.append([float, self._parseFloatComma])

                # try all available parser functions, and remove those that fail:
                parsers = self.columnParsers[i][:]
                assert len(parsers) > 0
                for parser in parsers:
                    try:
                        v = parser(rawValue)
                    except Exception, e:
                        if len(self.columnParsers[i]) <= 1:
                            raise Exception(
                                "failed to parse CSV value '%s' (line %d, table column %d) as '%s': %s"
                                % (rawValue, self.lineNo, i + 2, parser, e)
                            )
                        else:
                            self.columnParsers[i].remove(parser)
                    else:
                        break

                self.store.update((self.ids[i], t, v))
                i += 1
Exemplo n.º 38
0
    def handle(self, *args, **options):
        for user in TwitterUser.objects.all():
            api = Twitter(user.username, user.password)
            timeline = api.statuses.user_timeline()

            # Update Twitter user data
            userdata = api.users.show(screen_name=user.username)

            user.twitter_id = userdata['id']
            user.location = userdata['location']
            user.name = userdata['name']
            user.image_url = userdata['profile_image_url']
            user.homepage = userdata['url']
            user.description = userdata['description']
            user.save()

            # Fetch tweets
            for tweet in timeline:
                Tweet.objects.get_or_create(
                    twitter_user = user,
                    tweet = tweet['text'],
                    tweet_id = tweet['id'],
                    posted = dateutil.parser(tweet['created_at']).astimezone(tzlocal()).replace(tzinfo=None)
                )
Exemplo n.º 39
0
# display_device allows our custom vendor implementation to include
# special information when displaying information about a device using
# our plugin as a vendor.
def display_device (device):
  # no special information needed to run
  return ''

# openaps calls get_uses to figure out how how to use a device using
# agp as a vendor.  Return a list of classes which inherit from Use,
# or are compatible with it:
def get_uses (device, config):
  # make agp an openaps use command
  return [ agp ]

######################################################
# openaps definitions are complete
######################################################


# The remainder is for debugging and testing purposes.
# This allows running the module from commandline without openaps.
# this uses no openaps logic, and is useful for debugging/testing
# this only runs when run as:
# $ python agp.py
if __name__ == '__main__':
  parser = AGP( )
  with open("glucose.txt") as f:
    for hour, vals in parser(f.readlines()):
      print hour, vals

Exemplo n.º 40
0
def parse_line(line, parser):
    line = line.split(";")
    return parser(line)
Exemplo n.º 41
0
def parse_row(input_row, parsers):
    """given a list of parsers (some of which may be None)
    apply the appropriate one to each element of the input_row"""
    return [parser(value) if parser is not None else value
            for value, parser in zip(input_row, parsers)]
Exemplo n.º 42
0
    def run(self, ips, start_time=""):
        """ str, str -> networkx multiDiGraph

        :param ips: list of IP addresses to enrich in the graph
        :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object.
        :return: subgraph

        Note: based on From https://gist.github.com/zakird/11196064
        """

        # Parse the start_time
        if type(start_time) is str:
            try:
                time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ")
            except:
                time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
        elif type(start_time) is datetime:
            time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
        else:
            time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

        # Since sometimes I just pass in an IP, we'll fix it here.
        if type(ips) == str:
            ips = [ips]

        # Validate IP
        for ip in ips:
            _ = ipaddress.ip_address(unicode(ip))

        g = nx.MultiDiGraph()

        # Create cymru ASN enrichment node
        cymru_asn_uri = "class=attribute&key={0}&value={1}".format("enrichment", "cymru_asn_enrichment")
        attributes = {
            "class": "attribute",
            "key": "enrichment",
            "value": "cymru_asn_enrichment",
            "uri": cymru_asn_uri,
            "start_time": time,
        }
        g.add_node(cymru_asn_uri, attributes)

        #    print ips

        a = cymru_api.CymruIPtoASNService()

        for result in a.query(ips):
            try:
                t = dateutil.parser(result.allocated_at).strftime("%Y-%m-%dT%H:%M:%SZ")
            except:
                t = time
            # Create ip's node
            ip_uri = "class=attribute&key={0}&value={1}".format("ip", result.ip_address)
            g.add_node(
                ip_uri,
                {"class": "attribute", "key": "ip", "value": result.ip_address, "start_time": time, "uri": ip_uri},
            )

            # link to cymru ASN enrichment
            edge_attr = {"relationship": "describedBy", "origin": "cymru_asn_enrichment", "start_time": time}
            source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
            dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, cymru_asn_uri)
            edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
            rel_chain = "relationship"
            while rel_chain in edge_attr:
                edge_uri = edge_uri + "&{0}={1}".format(rel_chain, edge_attr[rel_chain])
                rel_chain = edge_attr[rel_chain]
            if "origin" in edge_attr:
                edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
            edge_attr["uri"] = edge_uri
            g.add_edge(ip_uri, cymru_asn_uri, edge_uri, edge_attr)

            # Create bgp prefix node
            bgp_uri = "class=attribute&key={0}&value={1}".format("bgp", result.bgp_prefix)
            attributes = {
                "class": "attribute",
                "key": "bgp",
                "value": result.bgp_prefix,
                "uri": bgp_uri,
                "start_time": time,
            }
            g.add_node(bgp_uri, attributes)

            # Link bgp prefix node to ip
            edge_attr = {"relationship": "describedBy", "origin": "cymru_asn_enrichment", "start_time": time}
            source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
            dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, bgp_uri)
            edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
            rel_chain = "relationship"
            while rel_chain in edge_attr:
                edge_uri = edge_uri + "&{0}={1}".format(rel_chain, edge_attr[rel_chain])
                rel_chain = edge_attr[rel_chain]
            if "origin" in edge_attr:
                edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
            edge_attr["uri"] = edge_uri
            g.add_edge(ip_uri, bgp_uri, edge_uri, edge_attr)

            # create asn node
            asn_uri = "class=attribute&key={0}&value={1}".format("asn", result.as_number)
            attributes = {
                "class": "attribute",
                "key": "asn",
                "value": result.as_number,
                "uri": asn_uri,
                "start_time": time,
            }
            try:
                attributes["owner"] = result.as_name
            except:
                pass
            g.add_node(asn_uri, attributes)

            # link bgp prefix to asn node
            edge_attr = {"relationship": "describedBy", "origin": "cymru_asn_enrichment", "start_time": t}
            source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri)
            dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, asn_uri)
            edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash))
            rel_chain = "relationship"
            while rel_chain in edge_attr:
                edge_uri = edge_uri + "&{0}={1}".format(rel_chain, edge_attr[rel_chain])
                rel_chain = edge_attr[rel_chain]
            if "origin" in edge_attr:
                edge_uri += "&{0}={1}".format("origin", edge_attr["origin"])
            edge_attr["uri"] = edge_uri
            g.add_edge(ip_uri, asn_uri, edge_uri, edge_attr)

        # Return the data enriched IP as a graph
        return g
Exemplo n.º 43
0
def formatted_date(date_object, date_format):
    return to_representation(parser(date_object))
Exemplo n.º 44
0
def parse(data, url=None, mimetype=None):
    " Determine which ruleset to use "

    rulesets = parse_rules()
    parsers = [FeedXML, FeedHTML, FeedJSON]

    # 1) Look for a ruleset based on path

    if url is not None:
        for ruleset in rulesets.values():
            if 'path' in ruleset:
                for path in ruleset['path']:
                    if fnmatch(url, path):
                        parser = [
                            x for x in parsers if x.mode == ruleset['mode']
                        ][0]
                        return parser(data, ruleset)

    # 2) Look for a parser based on mimetype

    if mimetype is not None:
        parser_candidates = [x for x in parsers if mimetype in x.mimetype]

    if mimetype is None or parser_candidates is None:
        parser_candidates = parsers

    # 3) Look for working ruleset for given parser
    # 3a) See if parsing works
    # 3b) See if .items matches anything

    for parser in parser_candidates:
        ruleset_candidates = [
            x for x in rulesets.values()
            if x['mode'] == parser.mode and 'path' not in x
        ]
        # 'path' as they should have been caught beforehands

        try:
            feed = parser(data)

        except (ValueError):
            # parsing did not work
            pass

        else:
            # parsing worked, now we try the rulesets

            for ruleset in ruleset_candidates:
                feed.rules = ruleset

                try:
                    feed.items[0]

                except (AttributeError, IndexError):
                    # parsing and or item picking did not work out
                    pass

                else:
                    # it worked!
                    return feed

    raise TypeError('no way to handle this feed')
Exemplo n.º 45
0
 def _is_match_func(line):
     parsed_line = parser(line)
     return _is_match(parsed_line.by_index(field))
Exemplo n.º 46
0
      self.add_record(record)

    # calculate for out each hour of day

    for hour in range(0,24):
      agps = calc_agp(self.hour_buckets[hour])
      for minute in range(0,60,5):
        out.append((hour, minute, agps[minute/5]))
    return out

def calc_agp (bucket):
  subbuckets = [[] for x in range(0,60,5)]
  for (time, glucose) in bucket:
    subbuckets[int(math.floor(time.minute / 5))].append(glucose)
  agps = [percentile(subbucket, [10,25,50,75,90]) for subbucket in subbuckets]
  return agps



# The remainder is for debugging and testing purposes.
# This allows running the module from commandline without openaps.
# this uses no openaps logic, and is useful for debugging/testing
# this only runs when run as:
# $ python agp.py
if __name__ == '__main__':
  parser = AGP( )
  with open("glucose.txt") as f:
    for hour, minute, vals in parser(f.readlines()):
      print hour, minute, vals

Exemplo n.º 47
0
    rds = redis.StrictRedis(connection_pool=POOL)
    rds.set(key, value)
    return value

def get_test_event():
    '''loads test_event.json file and returns the dict'''
    test_json = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'test_event.json')
    return json.load(open(test_json))

def parser():
    '''
    Construct a parser to parse arguments
    @return argparse parser
    '''
    parse = argparse.ArgumentParser(description="Run EONET query with given parameters")
    parse.add_argument("--starttime", required=False, default=None, help="Start time for query range.", dest="starttime")
    parse.add_argument("--endtime", required=False, default=None, help="End time for query range.", dest="endtime")
    parse.add_argument("--lookback_days", required=False, default=None, help="Number of days to lookback in query. Use 'redis': will use redis to query for products updated since last successful query time.", dest="lookback_days")
    parse.add_argument("--status", required=False, default=None, choices=['open', 'closed'], help="Status of event. open or closed", dest="status")
    parse.add_argument("--source", required=False, default=None, help="Query over single source, sources at: https://eonet.sci.gsfc.nasa.gov/api/v2.1/sources", dest="source")
    parse.add_argument("--slack_notification", required=False, default=False, help="Key for slack notification, will notify via slack if provided.", dest="slack_notification")
    parse.add_argument("--polygon", required=False, default=None, help="Geojson polygon filter", dest="polygon")
    parse.add_argument("--test", required=False, default=False, action="store_true", help="Run a test submission. Overrides all other params", dest="test") 
    parse.add_argument("--submit", required=False, default=False, action="store_true", help="Submits the event directly. Must have datasets in working directory.", dest="submit")
    return parse

if __name__ == '__main__':
    args = parser().parse_args()
    main(starttime=args.starttime, endtime=args.endtime, lookback_days=args.lookback_days, status=args.status, source=args.source, slack_notification=args.slack_notification, polygon=args.polygon, test=args.test, submit=args.submit)

Exemplo n.º 48
0
def parse_row_old(input_row, parsers):
    return [
        parser(value) if parser is not None else value
        for value, parser in zip(input_row, parsers)
    ]
Exemplo n.º 49
0
def parse_line(line, parser):
    line = line.split(';')
    return parser(line)
Exemplo n.º 50
0
    def testParserParseStr(self):
        from dateutil.parser import parser

        self.assertEqual(parser().parse(self.str_str),
                         parser().parse(self.uni_str))
Exemplo n.º 51
0
 def _get_factory(self, prefix, path, parser):
     """Wrapper for getting objects."""
     data = self.get("/".join((prefix, path, "")))
     return parser(weblate=self, **data)
Exemplo n.º 52
0
                timestamp_nextfile = gettimestamp(d[res])
                if (comparetimestamp(to_time, timestamp_nextfile) >= 0):
                    printlog(res, from_time, to_time)


##parser :--function used to find first timestamp encountered in every file and stored in a dictionary  with file_no as key and timestamp as value in the same order in which we inserted in the dictionary
def parser(dir_path):
    #os.chdir(dir_path)
    regex = r'^([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{4}Z)'
    for x in os.listdir(dir_path):
        if (x[-4:] == '.log' or x[-4:] == '.txt'):

            file_no = re.findall(r'\d+', x)
            y = int(file_no[0])
            d[y] = x
            with open(os.path.join(dir_path, x), "r") as file:
                line = file.readline()

                while (not re.findall(regex, line)):
                    line = file.readline()

                timestamp_check = re.findall(regex, line)
                if timestamp_check:
                    timestamp = re.findall(regex, line)[0]
                    timestamp_dict[y] = timestamp


sortfiles(dir_path)
parser(dir_path)
binary_search(timestamp_dict, from_time)
Exemplo n.º 53
0
def parse_row(input_row, parsers):
    """given a list of parsers (some of which may be None)
    apply the appropriate one to each element of the input_row"""
    return [parser(value) if parser is not None else value
            for value, parser in zip(input_row, parsers)]
Exemplo n.º 54
0
    def testParserParseStr(self):
        from dateutil.parser import parser

        self.assertEqual(parser().parse(self.str_str),
                         parser().parse(self.uni_str))