def parse_value(self, value): for parser in self.parsers: try: parser().check_value(value) return parser().parse_value(value) except ValueError: pass else: # pragma: no cover raise RuntimeError()
def check(self, token): for parser in self.parsers: try: parser().check(token) return except ParseError: pass else: raise ParseError(*token[0])
def check_value(self, value): for parser in self.parsers: try: parser().check_value(value) return except ValueError: pass else: raise ValueError()
def get_parser(self, value): if isinstance(value, type): check = issubclass else: check = isinstance for t, parser in self.parsers: if check(value, t): if isinstance(value, type): return parser() else: return parser(default=value) else: return self.default_parser_class(value)
def tokenizeText(sample): # get the tokens using spaCy tokens = parser(cleanText(sample)) # lemmatize lemmas = [] for tok in tokens: lemmas.append(tok.lemma_.lower().strip() if tok.lemma_ != "-PRON-" else tok.lower_) tokens = lemmas # stoplist the tokens tokens = [tok for tok in tokens if tok not in STOPLIST] # stoplist symbols tokens = [tok for tok in tokens if tok not in SYMBOLS] # remove large strings of whitespace while "" in tokens: tokens.remove("") while " " in tokens: tokens.remove(" ") while "\n" in tokens: tokens.remove("\n") while "\n\n" in tokens: tokens.remove("\n\n") return tokens
def parse_and_save_cal(self): print(self.qwestCal.toPlainText()) self.parse_engine = parser(self.qwestCal.toPlainText()) summ = [self.combo1.currentText(),self.combo1.currentIndex()] disc = self.description.text() if summ[1] == 0: summ = '$ccode' elif summ[1] == 1: summ = '$cname' elif summ[1] == 2: summ = '$ccode - $cname' else: summ = summ[0] dd = QtGui.QFileDialog.getSaveFileName() if dd[0].split('.') == '.ics': dd = dd[0].split('.')[0] else: dd = dd[0] with open(dd+'.ics', 'w') as f: f.write('BEGIN:VCALENDAR\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\nVERSION:1.0\nCALSCALE:GREGORIAN') for x in self.parse_engine.get_event_list(): f.write(x.print_ics(summ,disc)) f.write('END:VCALENDAR')
def _parse_value(s): s = s.strip('"') for parser in [int, float]: try: return parser(s) except ValueError: pass return s
def parse_setting(self, name, default, parser): raw_value = self.settings.get(name, None) if raw_value is None: return default try: return parser(raw_value) except (TypeError, ValueError): return default
def __call__(self, token): self.check(token) for parser in self.parsers: try: return parser()(token) except ParseError: pass else: raise ParseError(*token[0])
def parse(timestr, parserinfo=None, **kwargs): if isinstance(timestr, six.binary_type): timestr = timestr.decode() if isinstance(timestr, six.string_types): try: if parserinfo: ret = parser(parserinfo).parse(timestr, **kwargs) else: ret = parser().parse(timestr, **kwargs) except TypeError: raise ValueError("Can't parse date from string '%s'" % timestr) elif isinstance(timestr, int) or isinstance(timestr, float): ret = datetime.fromtimestamp(timestr) elif isinstance(timestr, datetime) or isinstance(timestr, date): ret = timestr else: raise TypeError("Can't convert %s to date." % type(timestr)) return ret
def testCustomParserInfo(self): # Custom parser info wasn't working, as Michael Elsdörfer discovered. from dateutil.parser import parserinfo, parser class myparserinfo(parserinfo): MONTHS = parserinfo.MONTHS[:] MONTHS[0] = ("Foo", "Foo") myparser = parser(myparserinfo()) dt = myparser.parse("01/Foo/2007") self.assertEqual(dt, datetime(2007, 1, 1))
def day_iter(hd): ''' Yields a dataframe of all recorded messages from a given day. Adds additional field to output df: timestamp (derived from time field). :param hd: :return: (response_df, day_date) ''' from dateutil import parser dates = hd.date.apply(str_to_date) hd['datestamp'] = dates.apply(lambda t: parser(t).timestamp()) max_date = max(dates) current_date = min(dates) while current_date <= max_date: day = into_mega_df( hd.query("datestamp == {}", parser(current_date).timestamp())) day['timestamp'] = day.time.apply(lambda t: parser(t).timestamp()) day.sort_values("timestamp", inplace=True) yield (day, current_date) current_date = current_date + datetime.timedelta(days=1)
def parse_aggregate_records(aggregate, headers): file_format = detect_aggregate_file_format(aggregate) if file_format == 0: parser = parse_aggregate_v1 elif file_format == 1: parser = parse_aggregate_v2 else: print("Unrecognized file format for file {}, skipping...", aggregate) return return parser(aggregate, headers)
class dateExtractor(): p = parser() info = p.info def timetoken(self, token): try: float(token) return True except ValueError: pass return any( f(token) for f in (self.info.jump, self.info.weekday, self.info.month, self.info.hms, self.info.ampm, self.info.pertain, self.info.utczone, self.info.tzoffset)) def timesplit(self, input_string): batch = [] for token in _timelex(input_string): if self.timetoken(token): if self.info.jump(token): continue batch.append(token) else: if batch: yield " ".join(batch) batch = [] if batch: yield " ".join(batch) def extract_dates_from(self, fuzzy_input): # set the default date to fill inconsitent valid dates DEFAULT = datetime(2018, 1, 1) # prepare string for extraction fuzzy_input = fuzzy_input.replace(' ', '') fuzzy_input = fuzzy_input.replace('-', '__delimitor__') fuzzy_input = fuzzy_input.replace(',', '__delimitor__') fuzzy_input = fuzzy_input.lower().replace( 'present', datetime.now().strftime('%Y-%m-%d')) found_dates = [] for item in self.timesplit(fuzzy_input): try: # use parse library with convinient arguments found_dates.append( self.p.parse(item, fuzzy=True, default=DEFAULT)) except Exception: continue return found_dates
def parse(timestr, parserinfo=None, **kwargs): if isinstance(timestr, six.binary_type): timestr = timestr.decode() if isinstance(timestr, six.string_types): try: if parserinfo: ret = parser(parserinfo).parse(timestr, **kwargs) else: ret = parser().parse(timestr, **kwargs) except TypeError: raise ValueError("Can't parse date from string '%s'" % timestr) elif isinstance(timestr, int) or isinstance(timestr, float): ret = datetime.fromtimestamp(timestr) elif isinstance(timestr, datetime) or isinstance(timestr, date): ret = timestr elif isinstance(timestr, time): ret = timestr else: raise TypeError("Can't convert %s to date." % type(timestr)) return ret
def __iter__(self): """Iterate over all of the lines in the file""" self.start() parser = self.make_fw_row_parser() for line in self._fstor.open(mode='r', encoding=self.spec.encoding): yield [e.strip() for e in parser(line)] self.finish()
def parse(self, create_data: dict): """ Convert raw log content into usable data. :param self: Task instance, supplied by Celery. :param create_data: Default log parameters. :return: Parsed data. """ log_type = create_data['log_type'] parser = getattr(handlers, log_type) content = create_data['content'] match_data = parser(content, ProgressRecorder(self)) return match_data
def main(self, args, app): """ Main logic for calculating agp """ # print args # get parameters params = self.get_params(args) # print params.get('input') # create calculator parser = AGP() with open(params.get('input'), 'r') as f: # calculate agp for all input return parser(f.readlines())
def main (self, args, app): """ Main logic for calculating agp """ # print args # get parameters params = self.get_params(args) # print params.get('input') # create calculator parser = AGP( ) with open(params.get('input'), 'r') as f: # calculate agp for all input return parser(f.readlines())
def __call__(self, lines): result = self.get_defaults() for token in tokenize(lines): key = get_key(token) if key in self: parser = self[key] else: parser = self.default_parser_class() result[key] = parser(token) return result
def testCustomParserShortDaynames(self): # Horacio Hoyos discovered that day names shorter than 3 characters, # for example two letter German day name abbreviations, don't work: # https://github.com/dateutil/dateutil/issues/343 from dateutil.parser import parserinfo, parser class GermanParserInfo(parserinfo): WEEKDAYS = [("Mo", "Montag"), ("Di", "Dienstag"), ("Mi", "Mittwoch"), ("Do", "Donnerstag"), ("Fr", "Freitag"), ("Sa", "Samstag"), ("So", "Sonntag")] myparser = parser(GermanParserInfo()) dt = myparser.parse("Sa 21. Jan 2017") self.assertEqual(dt, datetime(2017, 1, 21))
def parse_ambit_samples(file_obj): # ambit data is not valid xml. need to add a fake top level entry. tree = ET.parse(file_obj) item = {} for sample in tree.find('samples'): is_gps_sample = (sample.find('Latitude') is not None) for child in sample: parser = PARSERS.get(child.tag) if not callable(parser): continue item[child.tag] = parser(child.text) if is_gps_sample: # merging data. gps samples have full seconds, so it's kinda nicer... yield item item = {}
def main(): # Create the output dir os.makedirs(PROCESSED_PATH, exist_ok=True) # parse_map contains per-file parsers. For example, visibility is parsed differently # from air temperature. The key of the map are the file names which are attached # to their respective parsing functions. parse_map = { RAW_FILE_NAME_AIR_TEMPERATURE: parseAirTemperature, RAW_FILE_NAME_PRECIPITATION: parsePrecipitation, RAW_FILE_NAME_VISIBILITY: parseVisibility, RAW_FILE_NAME_WIND_DIRECTION: parseWindDirection, RAW_FILE_NAME_WIND_SPEED: parseWindSpeed, } # RAW_PATH is a dir containing folders of location (e.g. OSL, TRD). # These locations are dirs containing weather info (e.g. air_temperature.csv etc) for location in os.listdir(RAW_PATH): print("\nProcessing weather data for %s" % location) # Data is a dict containing parsed dataframes for each weather category # data['air_temperature'] = pd.Dateframe data = {} for file_name in os.listdir('%s/%s' % (RAW_PATH, location)): df = pd.read_csv('%s/%s/%s' % (RAW_PATH, location, file_name)) print('Parsing %s' % file_name) parser = parse_map[file_name] # Get appropriate parser for file # Parse data (key can for example be AIR_TEMPERATURE) (df, key) = parser(df) # Do common parsing for all frames dropExcessColumns(df) df[REFERENCE_TIME] = df.apply(removeTimeZone, axis=1) # Attach dataframe to the data object data[key] = df if data: # Data should now be a dict containing data about # all weather categories for this location. Let's merge them # and save the output as <location>.csv print("Merging data...") merged = mergeDataFrames(data) merged.to_csv('%s/%s.csv' % (PROCESSED_PATH, location), index=False)
def regex_file(path, parser=parse_txt): path = os.path.abspath(path) file_name, file_lines = parser(path) # for line_num, line in enumerate(file_lines): for match_info in regex_text(line): yield [path, file_name, match_info['REGEX_LABEL'], line_num, match_info['MATCH'], match_info['START'], match_info['END'], match_info['MATCH_LEN'], match_info['NORM_TEXT_LEN'], match_info['CONTEXT'], match_info['PARSED'], ]
def envelopes(request): ''' Loads the envelopes page ''' token = 'Token token=' + request.session.get('api_token') path = 'https://simplifiapi2.herokuapp.com/user_envelopes' req = requests.get(path, headers={'Authorization': token}) data = req.json() data = sorted(data, key=lambda x: x['envelope_name'].upper()) load_data = parser(data) data = json.dumps(load_data['data']) context = {'envelopes': load_data['data'], 'data': data} if request.GET.get('type'): return render(request, 'envelopes_ajax.html', context) else: return render(request, 'envelopes.html', context)
def goals_remove_confirm(request, goal_id): ''' Loads the the modal to remove goal ''' if not request.session.get('api_token'): return HttpResponseRedirect('/') token = 'Token token=' + request.session.get('api_token') path = 'https://simplifiapi2.herokuapp.com/goals/' + goal_id header = {'Content-type': 'application/json', 'Authorization': token} req = requests.get(path, headers=header) if req.ok is False: context = {'error': req.status_code} else: data = req.json() load_data = parser(data) context = {'goal': load_data['data']} return render(request, 'goals_remove.html', context)
def transactions_remove_confirm(request, transaction_id): ''' Loads the the modal to add new account ''' if not request.session.get('api_token'): return HttpResponseRedirect('/') # Get token and transaction for deletion token = 'Token token=' + request.session.get('api_token') path = 'https://simplifiapi2.herokuapp.com/account_transactions/' + transaction_id header = {'Content-type': 'application/json', 'Authorization': token} req = requests.get(path, headers=header) if req.ok is False: context = {'error': req.status_code} else: data = req.json() load_data = parser(data) context = {'transaction': load_data['data']} return render(request, 'transactions_remove.html', context)
def transactions(request): ''' Loads the my spending page ''' if not request.session.get('api_token'): return HttpResponseRedirect('/') token = 'Token token=' + request.session.get('api_token') path = 'https://simplifiapi2.herokuapp.com/account_transactions' req = requests.get(path, headers={'Authorization': token}) data = req.json() data = sorted(data, key=lambda x: x['date'].upper()) load_data = parser(data) context = {'transactions': load_data['data']} if request.GET.get('type'): return render(request, 'transactions_ajax.html', context) else: return render(request, 'transactions.html', context)
def parse_ambit_samples(file_obj): # ambit data is not valid xml. need to add a fake top level entry. tree = ET.parse(file_obj) item = {} # very old format (not sure if it still works...) samples = tree.find("samples") # either SML (having DeviceLog -> Samples) or a bit older (just Samples) if samples is None: samples = tree.find(".//Samples") for sample in samples: is_gps_sample = sample.find("Latitude") is not None for child in sample: parser = PARSERS.get(child.tag) if not callable(parser): continue item[child.tag] = parser(child.text) if is_gps_sample: # merging data. gps samples have full seconds, so it's kinda nicer... yield item item = {}
def parse_ambit_samples(file_obj): # ambit data is not valid xml. need to add a fake top level entry. tree = ET.parse(file_obj) item = {} # very old format (not sure if it still works...) samples = tree.find('samples') # either SML (having DeviceLog -> Samples) or a bit older (just Samples) if samples is None: samples = tree.find('.//Samples') for sample in samples: is_gps_sample = (sample.find('Latitude') is not None) for child in sample: parser = PARSERS.get(child.tag) if not callable(parser): continue item[child.tag] = parser(child.text) if is_gps_sample: # merging data. gps samples have full seconds, so it's kinda nicer... yield item item = {}
def parse_response(self, response): parser = None # Handle login status. if response.status == 400: req_once_logged = response.meta['req_once_logged'] if 'req_once_logged' in response.meta else response.request self.logger.warning("%s: HTTP 400 at %s. Going to index page. Error message: %s" % (self.login['username'], response.url, self.get_text(response.xpath(".//body/text()").extract()))) yield self.make_request(url='index', response=response, req_once_logged=req_once_logged, shared = False, dont_filter = True) elif self.islogged(response) is False: self.loggedin = False req_once_logged = response.meta['req_once_logged'] if 'req_once_logged' in response.meta else response.request if self.is_login_page(response) is False: self.logger.info('Not logged in. Going to login page.') yield self.make_request(reqtype='loginpage', response=response, req_once_logged=req_once_logged) elif self.is_login_page(response) is True: if self.is_login_page(response) is True and self.login_failed(response) is True: self.logger.info('Failed last login as %s. Trying again. Error: %s' % (self.login['username'], self.get_text(response.xpath('.//p[@class="error"]')))) if self.logintrial > self.settings['MAX_LOGIN_RETRY']: self.wait_for_input("Too many login failed", req_once_logged) self.logintrial = 0 return self.logger.info("Trying to login as %s." % self.login['username']) self.logintrial += 1 yield self.make_request(reqtype='dologin', response=response, req_once_logged=req_once_logged) else: self.loggedin = True if response.meta['reqtype'] == 'dologin': self.logger.info("Succesfully logged in as %s! Returning to stored request %s" % (self.login['username'], response.meta['req_once_logged'])) if response.meta['req_once_logged'] is None: self.logger.warning("We are trying to yield a None. This should not happen.") yield response.meta['req_once_logged'] else: if self.is_threadlisting(response) is True: parser = self.parse_threadlisting elif self.is_message(response) is True: parser = self.parse_message elif self.is_user(response) is True: parser = self.parse_user # Yield the appropriate parsing function. if parser is not None: for x in parser(response): yield x
def update_schedule(view, data): if data is None: view.flash('Saved changes.') return if data == "": view.flash(u"Empty data file was given!.", "warning") return parsers = [ parse_json, parse_csv, parse_locations, parse_newcsv, parse_pms_json] result = None for parser in parsers: try: result = parser(view.context, data) except InvalidParserError, e: continue except ScheduleImportError, e: for flash_params in e.messages: view.flash(*flash_params) return
def question(request): file = BytesIO(request.body) user_question = sr.AudioFile(file) recognizer_instance = sr.Recognizer() recognizer_instance.energy_threshold = 200 with user_question as source: audio = recognizer_instance.record(source) try: stringy = recognizer_instance.recognize_google(audio, language='ro-RO') except sr.UnknownValueError: return HttpResponse('{ "message": "Speech is unintelligible." }') except sr.RequestError: return HttpResponse('{ "message": "Speech recognition failed." }') stringy = strip_accents(stringy) file.close() return HttpResponse('{ "message": "' + parser(stringy) + '" }')
def get_parser(self, response_headers, response_body): # sanitize response headers response_headers = {k.lower(): v for k, v in response_headers.items()} # check response type, should be HTML # @TODO: HTML check if 'server' in response_headers: if response_headers['server'].startswith('apache'): response_headers['server'] = 'apache' elif response_headers['server'].startswith('nginx'): response_headers['server'] = 'nginx' else: response_headers['server'] = 'unknown' else: response_headers['server'] = 'unknown' if response_headers['server'] in self.parsers: parsers = self.parsers[response_headers['server']] for parser in parsers: try: data = parser(response_body) if data: self.parser = parser return self.parser except Exception as ex: pass # no suitable index parser found at this point, do some regex on <a> tags try: data = self._parse4(response_body) if data: self.parser = self._parse4 return self.parser except: pass
def buildQuery(self, conditions: List[tuple]): for element in conditions: variable = element[0] condition = element[1] value = element[2] if variable == 'id' and condition in [ '>', '<', '=', 'IN', 'NOTIN', 'BETWEEN' ]: query_builder.pickWhereCondition(variable, condition, value) elif variable == 'url' and condition == '=': query_builder.pickWhereCondition(variable, condition, value) elif variable == 'date' and condition in [ '>', '<', '=', 'BETWEEN' ]: date = parser(variable) date_string = f'{date.year}-{date.month}-{date.day}' query_builder.pickWhereCondition(date_string, condition, value) elif variable == 'rating' and condition in [ '>', '<', '=', 'BETWEEN' ]: query_builder.pickWhereCondition(variable, condition, value)
def readAvailableData(self): while True: try: r = self.reader.next() except StopIteration: break self.lineNo += 1 if self.timeParser is None: self.timeParser = self._detectTimeType(r[0]) t = self.timeParser(r[0]) i = 0 for rawValue in r[1:]: if len(self.columnParsers) <= i: # initialize list of possible parsers for this column: self.columnParsers.append([float, self._parseFloatComma]) # try all available parser functions, and remove those that fail: parsers = self.columnParsers[i][:] assert len(parsers) > 0 for parser in parsers: try: v = parser(rawValue) except Exception, e: if len(self.columnParsers[i]) <= 1: raise Exception( "failed to parse CSV value '%s' (line %d, table column %d) as '%s': %s" % (rawValue, self.lineNo, i + 2, parser, e) ) else: self.columnParsers[i].remove(parser) else: break self.store.update((self.ids[i], t, v)) i += 1
def handle(self, *args, **options): for user in TwitterUser.objects.all(): api = Twitter(user.username, user.password) timeline = api.statuses.user_timeline() # Update Twitter user data userdata = api.users.show(screen_name=user.username) user.twitter_id = userdata['id'] user.location = userdata['location'] user.name = userdata['name'] user.image_url = userdata['profile_image_url'] user.homepage = userdata['url'] user.description = userdata['description'] user.save() # Fetch tweets for tweet in timeline: Tweet.objects.get_or_create( twitter_user = user, tweet = tweet['text'], tweet_id = tweet['id'], posted = dateutil.parser(tweet['created_at']).astimezone(tzlocal()).replace(tzinfo=None) )
# display_device allows our custom vendor implementation to include # special information when displaying information about a device using # our plugin as a vendor. def display_device (device): # no special information needed to run return '' # openaps calls get_uses to figure out how how to use a device using # agp as a vendor. Return a list of classes which inherit from Use, # or are compatible with it: def get_uses (device, config): # make agp an openaps use command return [ agp ] ###################################################### # openaps definitions are complete ###################################################### # The remainder is for debugging and testing purposes. # This allows running the module from commandline without openaps. # this uses no openaps logic, and is useful for debugging/testing # this only runs when run as: # $ python agp.py if __name__ == '__main__': parser = AGP( ) with open("glucose.txt") as f: for hour, vals in parser(f.readlines()): print hour, vals
def parse_line(line, parser): line = line.split(";") return parser(line)
def parse_row(input_row, parsers): """given a list of parsers (some of which may be None) apply the appropriate one to each element of the input_row""" return [parser(value) if parser is not None else value for value, parser in zip(input_row, parsers)]
def run(self, ips, start_time=""): """ str, str -> networkx multiDiGraph :param ips: list of IP addresses to enrich in the graph :param start_time: string in ISO 8601 combined date and time format (e.g. 2014-11-01T10:34Z) or datetime object. :return: subgraph Note: based on From https://gist.github.com/zakird/11196064 """ # Parse the start_time if type(start_time) is str: try: time = dateutil.parser.parse(start_time).strftime("%Y-%m-%dT%H:%M:%SZ") except: time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") elif type(start_time) is datetime: time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") else: time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") # Since sometimes I just pass in an IP, we'll fix it here. if type(ips) == str: ips = [ips] # Validate IP for ip in ips: _ = ipaddress.ip_address(unicode(ip)) g = nx.MultiDiGraph() # Create cymru ASN enrichment node cymru_asn_uri = "class=attribute&key={0}&value={1}".format("enrichment", "cymru_asn_enrichment") attributes = { "class": "attribute", "key": "enrichment", "value": "cymru_asn_enrichment", "uri": cymru_asn_uri, "start_time": time, } g.add_node(cymru_asn_uri, attributes) # print ips a = cymru_api.CymruIPtoASNService() for result in a.query(ips): try: t = dateutil.parser(result.allocated_at).strftime("%Y-%m-%dT%H:%M:%SZ") except: t = time # Create ip's node ip_uri = "class=attribute&key={0}&value={1}".format("ip", result.ip_address) g.add_node( ip_uri, {"class": "attribute", "key": "ip", "value": result.ip_address, "start_time": time, "uri": ip_uri}, ) # link to cymru ASN enrichment edge_attr = {"relationship": "describedBy", "origin": "cymru_asn_enrichment", "start_time": time} source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, cymru_asn_uri) edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) rel_chain = "relationship" while rel_chain in edge_attr: edge_uri = edge_uri + "&{0}={1}".format(rel_chain, edge_attr[rel_chain]) rel_chain = edge_attr[rel_chain] if "origin" in edge_attr: edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) edge_attr["uri"] = edge_uri g.add_edge(ip_uri, cymru_asn_uri, edge_uri, edge_attr) # Create bgp prefix node bgp_uri = "class=attribute&key={0}&value={1}".format("bgp", result.bgp_prefix) attributes = { "class": "attribute", "key": "bgp", "value": result.bgp_prefix, "uri": bgp_uri, "start_time": time, } g.add_node(bgp_uri, attributes) # Link bgp prefix node to ip edge_attr = {"relationship": "describedBy", "origin": "cymru_asn_enrichment", "start_time": time} source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, bgp_uri) edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) rel_chain = "relationship" while rel_chain in edge_attr: edge_uri = edge_uri + "&{0}={1}".format(rel_chain, edge_attr[rel_chain]) rel_chain = edge_attr[rel_chain] if "origin" in edge_attr: edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) edge_attr["uri"] = edge_uri g.add_edge(ip_uri, bgp_uri, edge_uri, edge_attr) # create asn node asn_uri = "class=attribute&key={0}&value={1}".format("asn", result.as_number) attributes = { "class": "attribute", "key": "asn", "value": result.as_number, "uri": asn_uri, "start_time": time, } try: attributes["owner"] = result.as_name except: pass g.add_node(asn_uri, attributes) # link bgp prefix to asn node edge_attr = {"relationship": "describedBy", "origin": "cymru_asn_enrichment", "start_time": t} source_hash = uuid.uuid3(uuid.NAMESPACE_URL, ip_uri) dest_hash = uuid.uuid3(uuid.NAMESPACE_URL, asn_uri) edge_uri = "source={0}&destionation={1}".format(str(source_hash), str(dest_hash)) rel_chain = "relationship" while rel_chain in edge_attr: edge_uri = edge_uri + "&{0}={1}".format(rel_chain, edge_attr[rel_chain]) rel_chain = edge_attr[rel_chain] if "origin" in edge_attr: edge_uri += "&{0}={1}".format("origin", edge_attr["origin"]) edge_attr["uri"] = edge_uri g.add_edge(ip_uri, asn_uri, edge_uri, edge_attr) # Return the data enriched IP as a graph return g
def formatted_date(date_object, date_format): return to_representation(parser(date_object))
def parse(data, url=None, mimetype=None): " Determine which ruleset to use " rulesets = parse_rules() parsers = [FeedXML, FeedHTML, FeedJSON] # 1) Look for a ruleset based on path if url is not None: for ruleset in rulesets.values(): if 'path' in ruleset: for path in ruleset['path']: if fnmatch(url, path): parser = [ x for x in parsers if x.mode == ruleset['mode'] ][0] return parser(data, ruleset) # 2) Look for a parser based on mimetype if mimetype is not None: parser_candidates = [x for x in parsers if mimetype in x.mimetype] if mimetype is None or parser_candidates is None: parser_candidates = parsers # 3) Look for working ruleset for given parser # 3a) See if parsing works # 3b) See if .items matches anything for parser in parser_candidates: ruleset_candidates = [ x for x in rulesets.values() if x['mode'] == parser.mode and 'path' not in x ] # 'path' as they should have been caught beforehands try: feed = parser(data) except (ValueError): # parsing did not work pass else: # parsing worked, now we try the rulesets for ruleset in ruleset_candidates: feed.rules = ruleset try: feed.items[0] except (AttributeError, IndexError): # parsing and or item picking did not work out pass else: # it worked! return feed raise TypeError('no way to handle this feed')
def _is_match_func(line): parsed_line = parser(line) return _is_match(parsed_line.by_index(field))
self.add_record(record) # calculate for out each hour of day for hour in range(0,24): agps = calc_agp(self.hour_buckets[hour]) for minute in range(0,60,5): out.append((hour, minute, agps[minute/5])) return out def calc_agp (bucket): subbuckets = [[] for x in range(0,60,5)] for (time, glucose) in bucket: subbuckets[int(math.floor(time.minute / 5))].append(glucose) agps = [percentile(subbucket, [10,25,50,75,90]) for subbucket in subbuckets] return agps # The remainder is for debugging and testing purposes. # This allows running the module from commandline without openaps. # this uses no openaps logic, and is useful for debugging/testing # this only runs when run as: # $ python agp.py if __name__ == '__main__': parser = AGP( ) with open("glucose.txt") as f: for hour, minute, vals in parser(f.readlines()): print hour, minute, vals
rds = redis.StrictRedis(connection_pool=POOL) rds.set(key, value) return value def get_test_event(): '''loads test_event.json file and returns the dict''' test_json = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'test_event.json') return json.load(open(test_json)) def parser(): ''' Construct a parser to parse arguments @return argparse parser ''' parse = argparse.ArgumentParser(description="Run EONET query with given parameters") parse.add_argument("--starttime", required=False, default=None, help="Start time for query range.", dest="starttime") parse.add_argument("--endtime", required=False, default=None, help="End time for query range.", dest="endtime") parse.add_argument("--lookback_days", required=False, default=None, help="Number of days to lookback in query. Use 'redis': will use redis to query for products updated since last successful query time.", dest="lookback_days") parse.add_argument("--status", required=False, default=None, choices=['open', 'closed'], help="Status of event. open or closed", dest="status") parse.add_argument("--source", required=False, default=None, help="Query over single source, sources at: https://eonet.sci.gsfc.nasa.gov/api/v2.1/sources", dest="source") parse.add_argument("--slack_notification", required=False, default=False, help="Key for slack notification, will notify via slack if provided.", dest="slack_notification") parse.add_argument("--polygon", required=False, default=None, help="Geojson polygon filter", dest="polygon") parse.add_argument("--test", required=False, default=False, action="store_true", help="Run a test submission. Overrides all other params", dest="test") parse.add_argument("--submit", required=False, default=False, action="store_true", help="Submits the event directly. Must have datasets in working directory.", dest="submit") return parse if __name__ == '__main__': args = parser().parse_args() main(starttime=args.starttime, endtime=args.endtime, lookback_days=args.lookback_days, status=args.status, source=args.source, slack_notification=args.slack_notification, polygon=args.polygon, test=args.test, submit=args.submit)
def parse_row_old(input_row, parsers): return [ parser(value) if parser is not None else value for value, parser in zip(input_row, parsers) ]
def parse_line(line, parser): line = line.split(';') return parser(line)
def testParserParseStr(self): from dateutil.parser import parser self.assertEqual(parser().parse(self.str_str), parser().parse(self.uni_str))
def _get_factory(self, prefix, path, parser): """Wrapper for getting objects.""" data = self.get("/".join((prefix, path, ""))) return parser(weblate=self, **data)
timestamp_nextfile = gettimestamp(d[res]) if (comparetimestamp(to_time, timestamp_nextfile) >= 0): printlog(res, from_time, to_time) ##parser :--function used to find first timestamp encountered in every file and stored in a dictionary with file_no as key and timestamp as value in the same order in which we inserted in the dictionary def parser(dir_path): #os.chdir(dir_path) regex = r'^([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{4}Z)' for x in os.listdir(dir_path): if (x[-4:] == '.log' or x[-4:] == '.txt'): file_no = re.findall(r'\d+', x) y = int(file_no[0]) d[y] = x with open(os.path.join(dir_path, x), "r") as file: line = file.readline() while (not re.findall(regex, line)): line = file.readline() timestamp_check = re.findall(regex, line) if timestamp_check: timestamp = re.findall(regex, line)[0] timestamp_dict[y] = timestamp sortfiles(dir_path) parser(dir_path) binary_search(timestamp_dict, from_time)