def get_new_ephemeris(targetid, verbose=True): # read in the new ephemeris provided by Joel Hartman ephem_path = (os.path.join( DATADIR, 'updated_ephemerides/{}/{}.updateephem.txt'.format( today_YYYYMMDD(), targetid))) with open(ephem_path, 'r') as f: lines = f.readlines() epoch = [l for l in lines if '- Epoch: ' in l][0] period = [l for l in lines if '- Period: ' in l][0] dur = [l for l in lines if '- Transit duration: ' in l][0] if verbose: print(epoch, period, dur) epoch = float(search('{} - Epoch: {} +/- {}', epoch)[1].strip()) period = float(search('{} - Period: {} +/- {}', period)[1].strip()) dur = float(search('{} - Transit duration: {} +/- {}', dur)[1].strip()) if verbose: print(epoch, period, dur) ephem_dict = {'period': period, 'epoch': epoch, 'duration': dur} return ephem_dict
def resolve_git_shortcut(git_shortcut): result = parse.parse("{:w}/{:w}#{:w}", git_shortcut) if not result: result = parse.parse("{:w}/{:w}", git_shortcut) if not result: return False username = result.fixed[0] project = result.fixed[1] git_tag = None if len(result.fixed) > 2: git_tag = result.fixed[2] r = requests.get( "https://raw.githubusercontent.com/{0}/{1}/master/setup.py".format( username, project)) if r.status_code == 404: return False result = parse.search("name='{}'", r.content) result2 = parse.search('name="{}"', r.content) if result: egg_name = result.fixed[0] elif result2: egg_name = result2.fixed[0] else: egg_name = project if git_tag: return "git+https://github.com/{0}/{1}.git@{2}#egg={3}".format( username, project, git_tag, egg_name) else: return "git+https://github.com/{0}/{1}.git#egg={2}".format( username, project, egg_name)
def publish(logline, rootname, cache): global face, keychain # Pull out and parse datetime for log entry # (note we shoudld use point time for timestamp) try: if not ": (point" in logline: return logdtstr = parse.search("[{}]", logline)[0] point = parse.search("(point {})", logline)[0].split(" ") except Exception as detail: print("publish: Parse error for", logline, "-", detail) return try: logdt = datetime.strptime(logdtstr, "%Y-%m-%d %H:%M:%S.%f") except Exception as detail: print("publish: Date/time conversion error for", logline, "-", detail) return name = pointNameToName(point[0], rootname) data_json, data_dict = pointToJSON(point) if name is not None: print("Publishing log entry", logdt, "to", name, data_dict["timestamp"], "payload:", data_json) try: cache.add(createData(name, data_dict["timestamp"], data_json)) except Exception as detail: print("publish: Error calling createData for", logline, "-", detail)
def parse_region(self, region): region = "".join(region.split()) try: name = parse.search(";@{:w}:", region).fixed[0] reg = parse.search(":({:d},{:d})", region).fixed self.ig_regions[name] = reg except (AttributeError, IndexError): pass
def parse_additional(self, additional): additional = "".join(additional.split()) try: name = parse.search(";+{:w}:", additional).fixed[0] add = parse.search(":{:w}", additional).fixed[0] self.ig_additional[name] = add except (AttributeError, IndexError): pass
def signal_params(filename): "Extract signal parameter values from filename" yuk = search("TopYuk_{:f}", filename).fixed[0] lam = search("SlfCoup_{:f}", filename) if lam is None: lam = -search("SlfCoup_m{:f}", filename).fixed[0] else: lam = lam.fixed[0] return (yuk, lam)
def find_person(self): """ Load committee details for the given detail page URL or numeric ID """ # Read either person_id or committee_url from the opposite user_overview_url = self.urls['PERSON_OVERVIEW_PRINT_PATTERN'] % self.config['scraper']['base_url'] logging.info("Getting user overview from %s", user_overview_url) time.sleep(self.config['scraper']['wait_time']) response = self.get_url(user_overview_url) if not response: return # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) trs = dom.xpath(self.xpath['PERSONLIST_LINES']) for tr in trs: current_person = None link = tr.xpath('.//a') if len(link): parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN'], link[0].get('href')) if not parsed: parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN_ALT'], link[0].get('href')) if parsed: person_id = parsed['person_id'] current_person = Person(originalId=person_id) if current_person: tds = tr.xpath('.//td') if len(tds): if len(tds[0]): person_name = tds[0][0].text.strip() if person_name: current_person.name = person_name if len(tds) > 1: person_party = tds[1].text.strip() if person_party: for party_alias in self.config['scraper']['party_alias']: if party_alias[0] == person_party: person_party = party_alias[1] break new_organization = Organization(originalId=person_party, name=person_party, classification='party') new_membership = Membership(originalId=unicode(person_id) + '-' + person_party, organization=new_organization) current_person.membership = [new_membership] if current_person: if hasattr(self, 'person_queue'): self.person_queue.add(current_person.originalId) self.db.save_person(current_person) return
def __init__(self, string): if string is not None: a = parse.search("items={page:d}-{limit:d}", string) b = parse.search("items={page:d}-", string) c = parse.search("items=-{limit:d}", string) s = a or b or c if s: s = s.named self.page = s.get('page', None) self.limit = s.get('limit', None)
def __init__(self, data): """Initialize Mode from xrandr data.""" self.data = data self.header = data[0] self.name = parse.search("({mode_name})", self.header)["mode_name"] self.res_x = parse.search("h: width{:s}{res_x:d}", data[1])["res_x"] self.res_y = parse.search("v: height{:s}{res_y:d}", data[2])["res_y"] self.refresh = parse.search("{refresh:f}Hz", data[2])["refresh"] self.preferred = "+preferred" in self.header self.current = "*current" in self.header
def parseError(self, errorDesc): parsed = search("MapperParsingException[{}[{field_name}]{}", errorDesc) if not parsed: parsed = search("MapperParsingException[{}[{}]{}[{field_name}]{}", errorDesc) if not parsed: parsed = search("{}MapperParsingException[{}[{field_name}]{}", errorDesc) if not parsed: parsed = search("{}MapperParsingException[{}[{}]{}[{field_name}]{}", errorDesc) LOG.info("Parsed ES Error: %s from description %s", parsed, errorDesc) if parsed and parsed.named: return parsed.named LOG.warning("Couldn't parse ES error: %s", errorDesc) return None
def get_end_time_from_file(self): """ Get first and last file of list and set end time from file name :returns: list of begin and end time as string """ pattern = 'TMP_TGL_2m_{}_allmembers.grib2' begin = search(pattern, self.file_list[0])[0] end = search(pattern, self.file_list[-1])[0] return begin, end
def find_person(self): """ Load committee details for the given detail page URL or numeric ID """ # Read either person_id or committee_url from the opposite user_overview_url = self.urls['PERSON_OVERVIEW_PRINT_PATTERN'] logging.info("Getting user overview from %s", user_overview_url) time.sleep(self.config.WAIT_TIME) response = self.get_url(user_overview_url) if not response: return # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) trs = dom.xpath(self.xpath['PERSONLIST_LINES']) for tr in trs: current_person = None link = tr.xpath('.//a') if len(link): parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN'], link[0].get('href')) if not parsed: parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN_ALT'], link[0].get('href')) if parsed: person_id = parsed['person_id'] current_person = Person(numeric_id=person_id) if current_person: tds = tr.xpath('.//td') if len(tds): if len(tds[0]): person_name = tds[0][0].text.strip() if person_name: current_person.title = person_name if len(tds) > 1: person_party = tds[1].text.strip() if person_party: if person_party in self.config.PARTY_ALIAS: person_party = self.config.PARTY_ALIAS[person_party] current_person.committee = [{'committee': Committee(identifier=person_party, title=person_party, type='party')}] if current_person: if hasattr(self, 'person_queue'): self.person_queue.add(current_person.numeric_id) self.db.save_person(current_person) return
def parseLine(self, line): try: if not ": (point" in line: return dateTimeStr = parse.search("[{}]", line)[0] point = parse.search("(point {})", line)[0].split(" ") except Exception as detail: print("publish: Parse error for", line, "-", detail) return try: dateTime = datetime.strptime(dateTimeStr, "%Y-%m-%d %H:%M:%S.%f") except Exception as detail: print("publish: Date/time conversion error for", line, "-", detail) return self.pointNameToNDNName(point[0])
def parser(data): pattern = "{op} {ax} {val:d}" match = parse.search(pattern, data) if match: return match.named pattern = "{op} {ax} {val}" match = parse.search(pattern, data) if match: return match.named pattern = "{op} {ax}" match = parse.search(pattern, data) if match: return match.named
def pick(token, input_msg): result = search(token, input_msg) if result is not None: result, = result.fixed return result else: return None
async def server_countdown_close_connection_in_middle(ws, path): await WebSocketServerHelper.send_connection_ack(ws) result = await ws.recv() json_result = json.loads(result) assert json_result["type"] == "start" payload = json_result["payload"] query = payload["query"] query_id = json_result["id"] count_found = search("count: {:d}", query) count = count_found[0] stopping_before = count // 2 print( f"Countdown started from: {count}, stopping server before {stopping_before}" ) for number in range(count, stopping_before, -1): await ws.send( countdown_server_answer.format(query_id=query_id, number=number)) await asyncio.sleep(2 * MS) print("Closing server while subscription is still running now") await ws.close() await ws.wait_closed() print("Server is now closed")
def get_submission(self, submission_url=None, submission_id=None): """ Load submission (Vorlage) details for the submission given by detail page URL or numeric ID """ # Read either submission_id or submission_url from the opposite if submission_id is not None: submission_url = self.urls[ 'SUBMISSION_DETAIL_PRINT_PATTERN'] % submission_id elif submission_url is not None: parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], submission_url) submission_id = parsed['submission_id'] logging.info("Getting submission %d from %s", submission_id, submission_url) submission = Submission(numeric_id=submission_id) time.sleep(self.config.WAIT_TIME) try: response = self.user_agent.open(submission_url) except urllib2.HTTPError, e: if e.code == 404: sys.stderr.write( "URL not found (HTTP 404) error caught: %s\n" % submission_url) sys.stderr.write( "Please check BASE_URL in your configuration.\n") sys.exit(1)
def find_sessions(self, start_date=None, end_date=None): """ Find sessions within a given time frame and add them to the session queue. """ # list of (year, month) tuples to work from start_month = start_date.month end_months = (end_date.year - start_date.year) * 12 + end_date.month + 1 monthlist = [(yr, mn) for (yr, mn) in ( ((m - 1) / 12 + start_date.year, (m - 1) % 12 + 1) for m in range(start_month, end_months) )] for (year, month) in monthlist: url = self.urls['CALENDAR_MONTH_PRINT_PATTERN'] % (year, month) logging.info("Looking for sessions in %04d-%02d at %s", year, month, url) time.sleep(self.config.WAIT_TIME) response = self.user_agent.open(url) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) found = 0 for link in dom.xpath('//a'): href = link.get('href') if href is None: continue parsed = parse.search(self.urls['SESSION_DETAIL_PARSE_PATTERN'], href) if hasattr(self, 'session_queue') and parsed is not None: self.session_queue.add(int(parsed['session_id'])) found += 1 if found == 0: logging.info("No sessions found for month %04d-%02d", year, month) if self.options.verbose: print "No sessions found for month %04d-%02d\n" % (year, month)
def get_resource(self, path: str) -> ResourceABC: result = parse.search(self.config.pattern, path) if result: canonical = self.config.pattern.format(**result.named) if path.startswith(canonical): return self.create_resource(canonical, **result.named) return None
def _get_session_name(self, f: Path): if self.filename_format is not None and fnmatch( self.filename_format, "*{session*}*"): sess_name = search(self.filename_format, f.name)['session'] else: sess_name = f.name return sess_name
def test_change_password(logged_in, mailoutbox, settings): payload = dict( old_password='******', new_password='******', ) response = logged_in.client.patch( reverse('users:change_password'), json.dumps(payload), content_type='application/json', ) assert response.status_code == 200 assert len(mailoutbox) == 1 msg = mailoutbox[0] assert msg.subject == 'Your {site_name} password has been changed'.format( site_name=settings.SITE_NAME) assert msg.to == [logged_in.user.email] match = parse.search( '''Your password has been changed!''', msg.body, ) assert match is not None
def lookup_opcode(asm_line): for i in INSTRUCTIONS_TABLE.values(): if parse.search(i.asm_exp, asm_line) \ and asm_line.split()[0] == i.asm_exp.split()[0] \ and len(asm_line.split()) == len(i.asm_exp.split()): return i.get_opcode(asm_line) return None
def skip_page(markdown: str) -> bool: template = "git-snippet: {action:S}" result = parse.search(template, markdown) if result: if result["action"] == "enable": return False return True
def get_end_time_from_file(self): """ Get last file of list and set end time from file name :returns: list of begin and end time as string """ pattern = 'ps10km_{}_000.grib2' begin = search(pattern, self.file_list[0])[0] begin = datetime.strptime(begin, '%Y%m%d%H').strftime('%Y-%m-%dT%HZ') end = search(pattern, self.file_list[-1])[0] end = datetime.strptime(end, '%Y%m%d%H').strftime('%Y-%m-%dT%HZ') return [begin, end]
def _update_next_area(self): """ Update the area after the new one. Return the unlock link for the new area. """ # Some have this phrase in it's own sentence, others don't link = u'nlocked once you have completed %s in [[%s]].' if self.new_number not in self.areas_list: # New area is the last one return link % (u'some job', self.after) page = pywikibot.Page(pywikibot.Site(), self.areas_list[self.new_number]) old_text = page.get() # Also replace the area number while we're there i = self.new_number + 1 text = old_text.replace(number_map[i], number_map[i+1]) job = parse.search(link % (u'{}', self.after), old_text).fixed[0] old_link = link % (job, self.after) text = text.replace(old_link, link % (u'some job', self.area_name)) self._update_page(page, old_text, text) # Return the link so it can go in the new page return old_link
def search_for_element(self, response, resp_param): text = response.text # поиск param в теле text param = search(resp_param, text) # выборка нужного элемента element = param.fixed[0] return element
async def server_countdown(ws, path): import websockets from .conftest import MS, PhoenixChannelServerHelper try: await PhoenixChannelServerHelper.send_connection_ack(ws) result = await ws.recv() json_result = json.loads(result) assert json_result["event"] == "doc" payload = json_result["payload"] query = payload["query"] query_id = json_result["ref"] count_found = search("count: {:d}", query) count = count_found[0] print(f"Countdown started from: {count}") await ws.send(subscription_server_answer) async def counting_coro(): for number in range(count, -1, -1): await ws.send( countdown_server_answer.format(query_id=query_id, number=number) ) await asyncio.sleep(2 * MS) counting_task = asyncio.ensure_future(counting_coro()) async def stopping_coro(): nonlocal counting_task while True: result = await ws.recv() json_result = json.loads(result) if json_result["type"] == "stop" and json_result["id"] == str(query_id): print("Cancelling counting task now") counting_task.cancel() stopping_task = asyncio.ensure_future(stopping_coro()) try: await counting_task except asyncio.CancelledError: print("Now counting task is cancelled") stopping_task.cancel() try: await stopping_task except asyncio.CancelledError: print("Now stopping task is cancelled") await PhoenixChannelServerHelper.send_close(ws) except websockets.exceptions.ConnectionClosedOK: pass finally: await ws.wait_closed()
def _get_failure_info(stdout, vcd_path): waveform = _render_vcd(vcd_path) search_format = 'Assert failed in top: {}:{linenumber:d}' line_num = search(search_format, stdout)['linenumber'] step_format = 'Checking assertions in step {step_num:d}..' steps = findall(step_format, stdout) step_num = list(steps)[-1]['step_num'] return BMC_Result(Result.FAIL, stdout, waveform, step_num, line_num)
def readccbbm_using_parse(filename): fid = open(filename, 'rt') lines = fid.readlines() regex = r'[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?' coords = [] # Skip all lines until vertex ctr = 0 parser1 = parse.compile( "Vertex {:d} {:g} {:g} {:g} {Jfeature=({:g} {:g} {:g} {:g} {:g} {:g} {:g})}" ) parser2 = parse.compile("Vertex {:d} {:g} {:g} {:g}") while lines[ctr][0:6] != 'Vertex': ctr += 1 # First read the vertices line = lines[ctr] line_split = line.split() ctr += 1 # ctr = 1; attributes = [] while line_split[0] == 'Vertex': result = parser1.parse(line) if result is not None: (idx, vtx1, vtx2, vtx3, radial_dist, mTBM1, mTBM2, mTBM3, detJacobian, eig1Jacobian, eig2Jacobian) = result.fixed attributes.append(radial_dist) else: result = parser2.parse(line) if result is not None: (idx, vtx1, vtx2, vtx3) = result.fixed else: sys.stdout.write('Cannot parse the line' + line) coords.append([vtx1, vtx2, vtx3]) line = lines[ctr] line_split = line.split() ctr += 1 coords = np.array(coords) # The rest of the lines are faces faces = [] ctr -= 1 for ii in range(ctr, len(lines)): line = lines[ii] result = parse.search("Face {:d} {:d} {:d} {:d}", line) (idx, face1, face2, face3) = result.fixed faces.append([face1, face2, face3]) faces = np.array(faces) if faces.min() == 1: faces -= 1 isMultilevelUCF = False return coords, faces, attributes, isMultilevelUCF
def prepare_data_pack(info_bundle: InfoBundle) -> DataPack: date = info_bundle.date daily_cases = NumberParser.int_with_space( search(DataMiner.patterns["daily_cases"], info_bundle.text)["cases"]) daily_deaths = NumberParser.int_with_space( search(DataMiner.patterns["daily_deaths_direct"], info_bundle.text)["deaths"]) + \ NumberParser.int_with_space( search(DataMiner.patterns["daily_deaths_linked"], info_bundle.text)["deaths"]) daily_tests = NumberParser.int_with_modifier( search(DataMiner.patterns["daily_tests"], info_bundle.text)[0]) total_cases = NumberParser.int_with_space( search(DataMiner.patterns["totals"], info_bundle.text)["cases"]) total_deaths = NumberParser.int_with_space( search(DataMiner.patterns["totals"], info_bundle.text)["deaths"]) voivodeship_stats = {} for v in DataMiner.voivodeships: v_cases = 0 v_match = search(" " + v + "go ({})", info_bundle.text) if v_match: v_cases = v_match[0] v_cases = NumberParser.int_with_space(v_cases) voivodeship_stats[v.capitalize()] = { "date": date, "daily infected": v_cases } return DataPack(date, daily_cases, daily_deaths, daily_tests, total_cases, total_deaths, voivodeship_stats)
def parse_header(self, header): header = "".join(header.split()) try: self.ig_name = parse.search(">{:S}@", header).fixed[0] except (AttributeError, IndexError): self.ig_name = "Unknown" self.ig_type = _parse_header_helper("type", header) self.ig_alpha = _parse_header_helper("alpha", header) self.ig_specie = _parse_header_helper("specie", header)
def parse_download_fname(fname): # Use Parse to attempt to parse filenames for metadata. r = parse.search('{name}-{version}.tar', fname) if not r: r = parse.search('{name}-{version}.zip', fname) if not r: r = parse.parse('{name}-{version}-{extra}.{ext}', fname) version = r['version'] # Support for requirements-parser-0.1.0.tar.gz # TODO: Some versions might actually have dashes, will need to figure that out. # Will likely have to check of '-' comes at beginning or end of version. if '-' in version: version = version.split('-')[-1] return version
def available_dates(self): result = subprocess.run(['s3cmd', 'ls', f's3://{self.bucket}/'], capture_output=True) dates = set() for line in result.stdout.decode().splitlines(): res = parse.search(f's3://{self.bucket}/' + self.pattern, line) if res is not None: dates.add(str(res.named['date'])) return dates
def __init__(self,logbookfd): self.fileContents = logbookfd.read() self.rawLogEntries = list(r.fixed[0] for r in parse.findall("+++Begin log entry+++{}" + "+++End log entry+++", self.fileContents)) self.logEntries = [] for entry in self.rawLogEntries: timestamp = parse.search("Time:{i}\n", entry)['i'] user = parse.search("User:{i}\n", entry)['i'] note = parse.search("Note:{i}\n", entry)['i'] tags = list(r.fixed[0] for r in parse.findall("\'+{}\'", entry)) addedFiles = list(r.fixed[0] for r in parse.findall(addFileStr + "{}\n", entry)) removedFiles = list(r.fixed[0] for r in parse.findall(removeFileStr + "{}\n", entry)) self.logEntries.append(logEntry(timestamp, user, note, tags, addedFiles, removedFiles))
def importAxis(file, macroKeyword): axis = [] with open(file) as f: for line in f: val = search(macroKeyword+'({:d})', line) if val: axis.append(val[0]) assert len(axis), "No axis data found" return axis
def cartodb2ogr(service_endpoint, aoi, out_fields, where='', _=''): global FUNCTION_COUNT FUNCTION_COUNT += 1 # logging.info('FUNCTION cartodb2ogr STEP {} START'.format(FUNCTION_COUNT)) t0 = time() endpoint_template = 'https://{}.carto.com/tables/{}/' username, table = search(endpoint_template, service_endpoint + '/') url = 'https://{username}.carto.com/api/v2/sql'.format(username=username) if isinstance(aoi, str): aoi = json.loads(aoi) params = {} fields = ['ST_AsGeoJSON(the_geom) as geometry'] out_fields = out_fields.split(',') for field in out_fields: if field: fields.append('{field} as {field}'.format(field=field)) temp = "ST_Intersects(ST_Buffer(ST_GeomFromText('{}',4326),0),the_geom)" features = [] objectids = [] for f in aoi['features']: where_clause = temp.format( wkt.dumps({ 'type': 'Polygon', 'coordinates': bbox(f) })) if where and not where == '1=1': where_clause += 'AND {}'.format(where) q = 'SELECT {fields} FROM {table} WHERE {where}' params = { 'q': q.format(fields=','.join(fields), table=table, where=where_clause) } try: req = requests.get(url, params=params) req.raise_for_status() except Exception as e: raise ValueError((e, url, bbox(f))) response = json.loads(req.text)['rows'] features += [{ 'type': 'Feature', 'geometry': json.loads(h['geometry']), 'properties': {field: h[field] for field in out_fields if field} } for h in response] featureset = json2ogr({'type': 'FeatureCollection', 'features': features}) # logging.info('FUNCTION cartodb2ogr STEP {} DONE - {} SECONDS'.format(FUNCTION_COUNT, time()-t0)) return featureset
def parse_phase(self, phase_repr): """ Because the SUMO object Phase does not contain accessors, we parse the string representation to retrieve data members. :param phase_repr: The Phase string representation :return: An new Phase instance """ duration = search('duration: {:f}', phase_repr) min_duration = search('minDuration: {:f}', phase_repr) max_duration = search('maxDuration: {:f}', phase_repr) phase_def = search('phaseDef: {}\n', phase_repr) if phase_def is None: phase_def = '' else: phase_def = phase_def[0] return Phase(duration[0], min_duration[0], max_duration[0], phase_def)
def importAxis(file, macroKeyword): axis = [] with open(file) as f: for line in f: val = search(macroKeyword + '({:d})', line) if val: axis.append(val[0]) assert len(axis), "No axis data found" return axis
def test_compaction_throughput(self): """ Test setting compaction throughput. Set throughput, insert data and ensure compaction performance corresponds. """ cluster = self.cluster cluster.populate(1).start(wait_for_binary_proto=True) [node1] = cluster.nodelist() # disableautocompaction only disables compaction for existing tables, # so initialize stress tables with stress first stress_write(node1, keycount=1) node1.nodetool('disableautocompaction') stress_write(node1, keycount=200000 * cluster.data_dir_count) threshold = "5" node1.nodetool('setcompactionthroughput -- ' + threshold) node1.flush() if node1.get_cassandra_version() < '2.2': log_file = 'system.log' else: log_file = 'debug.log' mark = node1.mark_log(filename=log_file) node1.compact() matches = node1.watch_log_for('Compacted', from_mark=mark, filename=log_file) stringline = matches[0] throughput_pattern = '{}={avgthroughput:f}{units}/s' m = parse.search(throughput_pattern, stringline) avgthroughput = m.named['avgthroughput'] found_units = m.named['units'] unit_conversion_dct = { "MB": 1, "MiB": 1, "KiB": 1. / 1024, "GiB": 1024 } units = ['MB'] if cluster.version() < LooseVersion('3.6') else [ 'KiB', 'MiB', 'GiB' ] assert found_units in units logger.debug(avgthroughput) avgthroughput_mb = unit_conversion_dct[found_units] * float( avgthroughput) # The throughput in the log is computed independantly from the throttling and on the output files while # throttling is on the input files, so while that throughput shouldn't be higher than the one set in # principle, a bit of wiggle room is expected assert float(threshold) + 0.5 >= avgthroughput_mb
def solve(instance): result = instance.copy() strips_list = [map_doc_strips[doc] for doc in instance['docs']] strips = MixedStrips(strips_list, shuffle=True) prefix_dir = '{}/data/stripes/'.format(DOCREASSEMBLY_PATH_DIR) suffix_dir = '_{}'.format(len(strips.strips)) # to match the rule of the DocReassmeble software # create a temporary directory to hold the reconstruction instance data with tempfile.TemporaryDirectory(prefix=prefix_dir, suffix=suffix_dir) as tmpdirname: # record the current directory and move to the DocReassembly root directory curr_dir = os.path.abspath('.') os.chdir(DOCREASSEMBLY_PATH_DIR) # case (instance) name is the basename of the directory without the final _<n>, where n is the # number of strips. DocReassmebly will concatenate the path data/stripes/<case_name> # with the parameter <n>. case_name = os.path.basename(tmpdirname).replace(suffix_dir, '') # set the command to be executed (replace open parameters in the template string) cmd = CMD_TEMPLATE.format(case_name, len(strips.strips)) cmd = cmd.split() # split command to put in the format of the subprocess system call format # copy strips' images into the temporary directory for i, strip in enumerate(strips.strips): cv2.imwrite('{}/{}.png'.format(tmpdirname, i), strip.image[: MAX_STRIP_HEIGHT, :, :: -1]) # write the order file (ground-truth) order = len(strips.strips) * ['0'] # inverted init perm (which piece should be in each position?) for pos, element in enumerate(strips.init_perm): order[element] = str(pos) open('{}/order.txt'.format(tmpdirname), 'w').write('\n'.join(order)) # while(1): pass # run the software with open(os.devnull, 'w') as devnull: output = str(subprocess.check_output(cmd))#, stderr=devnull)) os.chdir(curr_dir) # return to the original directory sizes = instance['sizes'] solution = [int(s) for s in search('Composed order: {} \\n', output).fixed[0].split()] result['opt_time'] = float(search('Computation time: {}s', output).fixed[0]) result['accuracy'] = neighbor_comparison(solution, strips.init_perm, sizes) return result
def increase_sstable_generations(self, sstables): """ After finding the number of existing sstables, increase all of the generations by that amount. """ for table_or_index, table_sstables in sstables.items(): increment_by = len(set(parse.search('{}-{increment_by}-{suffix}.{file_extention}', s).named['increment_by'] for s in table_sstables)) sstables[table_or_index] = [self.increment_generation_by(s, increment_by) for s in table_sstables] debug('sstables after increment {}'.format(str(sstables)))
def __try_search(queries, string_to_search): result = None for query in queries: try: result = search(query, string_to_search) except ValueError: # This occurs if there's an apostrophe (') in the input string and it can't # convert a string and int pass if result is not None: return result return None
def test_compaction_throughput(self): """ Test setting compaction throughput. Set throughput, insert data and ensure compaction performance corresponds. """ cluster = self.cluster cluster.populate(1).start(wait_for_binary_proto=True) [node1] = cluster.nodelist() # disableautocompaction only disables compaction for existing tables, # so initialize stress tables with stress first stress_write(node1, keycount=1) node1.nodetool('disableautocompaction') stress_write(node1, keycount=200000 * cluster.data_dir_count) threshold = "5" node1.nodetool('setcompactionthroughput -- ' + threshold) node1.flush() if node1.get_cassandra_version() < '2.2': log_file = 'system.log' else: log_file = 'debug.log' mark = node1.mark_log(filename=log_file) node1.compact() matches = node1.watch_log_for('Compacted', from_mark=mark, filename=log_file) stringline = matches[0] throughput_pattern = '{}={avgthroughput:f}{units}/s' m = parse.search(throughput_pattern, stringline) avgthroughput = m.named['avgthroughput'] found_units = m.named['units'] unit_conversion_dct = { "MB": 1, "MiB": 1, "KiB": 1. / 1024, "GiB": 1024 } units = ['MB'] if cluster.version() < LooseVersion('3.6') else ['KiB', 'MiB', 'GiB'] assert found_units in units logger.debug(avgthroughput) avgthroughput_mb = unit_conversion_dct[found_units] * float(avgthroughput) # The throughput in the log is computed independantly from the throttling and on the output files while # throttling is on the input files, so while that throughput shouldn't be higher than the one set in # principle, a bit of wiggle room is expected assert float(threshold) + 0.5 >= avgthroughput_mb
def _update_previous_area(self): """Update the area before the new one.""" page = pywikibot.Page(pywikibot.Site(), self.after) old_text = page.get() link = u'Completing %s job unlocks the [[%s]] area.' if self.new_number in self.areas_list: job = parse.search(link % (u'{}', self.areas_list[self.new_number]), old_text).fixed[0] link = link % (job, u'%s') text = old_text.replace(link % self.areas_list[self.new_number], link % self.area_name) else: final = u'It is currently the final area.' text = old_text.replace(final, link % (u'some', self.area_name)) self._update_page(page, old_text, text)
def get_all_characters(script_file_handle): """ Given a file handle for a script file, return a list of all the characters. """ characters_dict = {} script_file_handle.seek(0) for line in script_file_handle.readlines(): line = line.strip() search_result = parse.search('{}:', line) if search_result: character = search_result.fixed[0] if character not in characters_dict: characters_dict[character] = True return characters_dict.keys()
def prepare(context, series): """ Extract all experiments of series and save their relevant data to csv. """ v = ExperimentDocument.view('adaptor/experiment-series') l = v.all() flags_set = collect_flags(l) truthness_d = make_flags_truthness_dict(flags_set, l) ll = [] for doc in l: r = parse.search( '-DNI={:d} -DNJ={:d}', doc.settings.build_settings.other_flags) # Get sorted list of flags d = truthness_d[doc._id] keys = sorted(d.keys()) flags_list = [d[k] for k in keys] new_row = [ doc._id, doc.datetime, doc.validation_result.measured_time, doc.settings.program, doc.settings.build_settings.compiler, doc.settings.build_settings.base_opt, doc.settings.build_settings.optimization_flags, r[0], r[1], doc.hardware_info.cpu.cpu_name, doc.hardware_info.cpu.cpu_mhz, doc.hardware_info.cpu.cache_size] new_row.extend(flags_list) ll.append(new_row) rr = map(lambda i: "\t".join(map(str, i)), ll) r = map(lambda i: i + '\n', rr) # Keys are from loop up there flags_headers_list = keys headers = 'id\tdatetime\ttime\tprogram_name\tcompiler\t'\ 'base_opt\toptimization_flags\twidth\theight\tcpu_name\t'\ 'cpu_mhz\tcpu_cache\t' full_headers = headers + '\t'.join(flags_headers_list) + '\n' f = open(os.path.join(context.paths_manager.framework_root_dir, 'an/{0}.csv'.format(series)), 'w') f.write(full_headers) f.writelines(r)
def _add_to_bosses_page(self): areas = len(self.areas_list) new_line = u'*[[File:%s|100px]] [[%s]] in the [[%s]] area' % (self._boss_image_name(), self.boss_name, self.area_name) page = pywikibot.Page(pywikibot.Site(), u'Bosses') text = old_text = page.get() intro = u'There are currently %s bosses in the game' bosses = parse.search(intro % (u'{:d}'), text).fixed[0] line_before = u' in the [[%s]] area' % self.after intro = intro % (u'%d') text = text.replace(intro % (bosses), intro % ((bosses + 1))) text = text.replace(line_before, line_before + u'\n' + new_line) self._update_page(page, old_text, text)
def _check_chunk_length(self, session, value): result = session.cluster.metadata.keyspaces['ks'].tables['test_table'].as_cql_query() # Now extract the param list params = '' if self.cluster.version() < '3.0': if 'sstable_compression' in result: params = result else: if 'compression' in result: params = result self.assertNotEqual(params, '', "Looking for the string 'sstable_compression', but could not find it in {str}".format(str=result)) chunk_string = "chunk_length_kb" if self.cluster.version() < '3.0' else "chunk_length_in_kb" chunk_length = parse.search("'" + chunk_string + "': '{chunk_length:d}'", result).named['chunk_length'] self.assertEqual(chunk_length, value, "Expected chunk_length: {}. We got: {}".format(value, chunk_length))
def __check_proper_gah(string_to_search): # Check for the gah (gender / height / age) when formatted as: M/28/5'7" re_string = "((m|f|male|female)/\d+/\d+'\d+)" regex = re.compile(re_string, re.IGNORECASE) # print regex.match(submission.tti) match = regex.search(string_to_search) if match: rvalue = {} gah_str = match.group(0) # print gah_str # self.debug_str = gah_str result = search("{gender}/{age:d}/{feet:d}'{in:d}", gah_str) # print result.named rvalue['gender_is_female'] = RedditAnalyzer.__gender_from_string(result.named['gender']) rvalue['age'] = result.named['age'] rvalue['height_in'] = result.named['feet'] * 12 + result.named['in'] HITS_stats['check_proper_gah'] += 1 return rvalue return None
def scan_to_dict(scan): try: res = subprocess.check_output(["identify", scan.get_tiff_path()]).strip() except subprocess.CalledProcessError: return None image_width, image_length = search('TIFF {:d}x{:d}', res).fixed image_width = int(image_width) image_length = int(image_length) # if image_width != scan.image.width or image_length != scan.image.height: # print "Discrepancy in image sizes for scan %s" % scan.image.name tile_width = 256 tile_length = 256 return { 'document': scan.document_id, 'path': scan.get_tiff_name(), 'url': scan.image.url, 'tiles': {"w": tile_width, "h": tile_length}, 'resolutions': res.count('] TIFF '), 'size': { "w": image_width, "h": image_length } }
def call_token_generator(self, install_dir, randomPart, nodes): executable = os.path.join(install_dir, 'tools', 'bin', 'token-generator') if common.is_win(): executable += ".bat" args = [executable] if randomPart is not None: if randomPart: args.append("--random") else: args.append("--murmur3") for n in nodes: args.append(str(n)) debug('Invoking {}'.format(args)) token_gen_output = subprocess.check_output(args) lines = token_gen_output.split("\n") dc_tokens = None generated_tokens = [] for line in lines: if line.startswith("DC #"): if dc_tokens is not None: self.assertGreater(dc_tokens.__len__(), 0, "dc_tokens is empty from token-generator {}".format(args)) generated_tokens.append(dc_tokens) dc_tokens = [] else: if line: m = parse.search('Node #{node_num:d}:{:s}{node_token:d}', line) self.assertIsNotNone(m, "Line \"{}\" does not match pattern from token-generator {}".format(line, args)) node_num = int(m.named['node_num']) node_token = int(m.named['node_token']) dc_tokens.append(node_token) self.assertEqual(node_num, dc_tokens.__len__(), "invalid token count from token-generator {}".format(args)) self.assertIsNotNone(dc_tokens, "No tokens from token-generator {}".format(args)) self.assertGreater(dc_tokens.__len__(), 0, "No tokens from token-generator {}".format(args)) generated_tokens.append(dc_tokens) return generated_tokens
def get_value(self, val): """ :param val: The property to get :param val: str :return: the answer :rtype: str """ self.p.stdin.write(GET + " " + val + "\n") self.p.stdin.flush() self.log.info("Command {0} sent to radio".format(GET + " " + val)) n_tries = 10 while n_tries > 0: try: # Try to parse line = self.read_stdout() ret = parse.search("={}\n", line) if ret is not None and len(ret.fixed) != 0: self.log.debug("got " + ret.fixed[0] + " to get of " + val) return ret.fixed[0] except Exception as err: self.log.debug("Attempts {0}".format(n_tries)) time.sleep(0.1) n_tries -= 1 continue
def get_person(self, person_url=None, person_id=None): """ Load committee details for the given detail page URL or numeric ID """ # Read either person_id or committee_url from the opposite if person_id is not None: person_url = self.urls['COMMITTEE_DETAIL_PRINT_PATTERN_FULL'] % person_id elif person_url is not None: parsed = parse.search(self.urls['COMMITTEE_DETAIL_PARSE_PATTERN_FULL'], person_url) person_id = parsed['person_id'] logging.info("Getting meeting (committee) %d from %s", person_id, person_url) committee = Committee(numeric_id=person_id) time.sleep(self.config.WAIT_TIME) response = self.get_url(person_url) if not response: return # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) trs = dom.xpath(self.xpath['COMMITTEE_LINES']) for tr in trs: tds = tr.xpath('.//td') print tds if tr.get('class') == 'smcrowh': print tds[0].text else: for td in tds: print td[0].text return
def compaction_throughput_test(self): """ Test setting compaction throughput. Set throughput, insert data and ensure compaction performance corresponds. """ cluster = self.cluster cluster.populate(1).start(wait_for_binary_proto=True) [node1] = cluster.nodelist() # disableautocompaction only disables compaction for existing tables, # so initialize stress tables with stress first stress_write(node1, keycount=1) node1.nodetool('disableautocompaction') stress_write(node1, keycount=200000 * cluster.data_dir_count) threshold = "5" node1.nodetool('setcompactionthroughput -- ' + threshold) matches = block_on_compaction_log(node1) stringline = matches[0] throughput_pattern = '{}={avgthroughput:f}{units}/s' m = parse.search(throughput_pattern, stringline) avgthroughput = m.named['avgthroughput'] found_units = m.named['units'] units = ['MB'] if LooseVersion(cluster.version()) < LooseVersion('3.6') else ['KiB', 'MiB', 'GiB'] self.assertIn(found_units, units) debug(avgthroughput) # The throughput in the log is computed independantly from the throttling and on the output files while # throttling is on the input files, so while that throughput shouldn't be higher than the one set in # principle, a bit of wiggle room is expected self.assertGreaterEqual(float(threshold) + 0.5, float(avgthroughput))
def get_submission(self, submission_url=None, submission_id=None): """ Load submission (Vorlage) details for the submission given by detail page URL or numeric ID """ # Read either submission_id or submission_url from the opposite if submission_id is not None: submission_url = self.urls['SUBMISSION_DETAIL_PRINT_PATTERN'] % submission_id elif submission_url is not None: parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], submission_url) submission_id = parsed['submission_id'] logging.info("Getting submission %d from %s", submission_id, submission_url) submission = Submission(numeric_id=submission_id) time.sleep(self.config.WAIT_TIME) try: response = self.user_agent.open(submission_url) except urllib2.HTTPError, e: if e.code == 404: sys.stderr.write("URL not found (HTTP 404) error caught: %s\n" % submission_url) sys.stderr.write("Please check BASE_URL in your configuration.\n") sys.exit(1)
except: continue if tdcontent == 'Name:': submission.identifier = tds[n + 1].text.strip() elif tdcontent == 'Art:': submission.type = tds[n + 1].text.strip() elif tdcontent == 'Datum:': submission.date = tds[n + 1].text.strip() elif tdcontent == 'Name:': submission.identifier = tds[n + 1].text.strip() elif tdcontent == 'Betreff:': submission.subject = '; '.join(tds[n + 1].xpath('./text()')) elif tdcontent == 'Referenzvorlage:': link = tds[n + 1].xpath('a')[0] href = link.get('href') parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], href) submission.superordinate = { 'identifier': link.text.strip(), 'numeric_id': parsed['submission_id'] } # add superordinate submission to queue if hasattr(self, 'submission_queue'): self.submission_queue.add(parsed['submission_id']) # subordinate submissions are added to the queue elif tdcontent == 'Untergeordnete Vorlage(n):': current_category = 'subordinates' for link in tds[n + 1].xpath('a'): href = link.get('href') parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], href) if hasattr(self, 'submission_queue') and parsed is not None: #add subordinate submission to queue
def test_compactionstats(self): """ @jira_ticket CASSANDRA-10504 @jira_ticket CASSANDRA-10427 Test that jmx MBean used by nodetool compactionstats properly updates the progress of a compaction """ cluster = self.cluster cluster.populate(1) node = cluster.nodelist()[0] remove_perf_disable_shared_mem(node) cluster.start(wait_for_binary_proto=True) # Run a quick stress command to create the keyspace and table node.stress(['write', 'n=1', 'no-warmup']) # Disable compaction on the table node.nodetool('disableautocompaction keyspace1 standard1') node.nodetool('setcompactionthroughput 1') node.stress(['write', 'n=150K', 'no-warmup']) node.flush() # Run a major compaction. This will be the compaction whose # progress we track. node.nodetool_process('compact') # We need to sleep here to give compaction time to start # Why not do something smarter? Because if the bug regresses, # we can't rely on jmx to tell us that compaction started. time.sleep(5) compaction_manager = make_mbean('db', type='CompactionManager') with JolokiaAgent(node) as jmx: progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0] # Pause in between reads # to allow compaction to move forward time.sleep(2) updated_progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0] var = 'Compaction@{uuid}(keyspace1, standard1, {progress}/{total})bytes' progress = int(parse.search(var, progress_string).named['progress']) updated_progress = int(parse.search(var, updated_progress_string).named['progress']) logger.debug(progress_string) logger.debug(updated_progress_string) # We want to make sure that the progress is increasing, # and that values other than zero are displayed. assert updated_progress > progress assert progress >= 0 assert updated_progress > 0 # Block until the major compaction is complete # Otherwise nodetool will throw an exception # Give a timeout, in case compaction is broken # and never ends. start = time.time() max_query_timeout = 600 logger.debug("Waiting for compaction to finish:") while (len(jmx.read_attribute(compaction_manager, 'CompactionSummary')) > 0) and ( time.time() - start < max_query_timeout): logger.debug(jmx.read_attribute(compaction_manager, 'CompactionSummary')) time.sleep(2)
def get_session(self, session_url=None, session_id=None): """ Load session details for the given detail page URL or numeric ID """ # Read either session_id or session_url from the opposite if session_id is not None: session_url = self.urls['SESSION_DETAIL_PRINT_PATTERN'] % session_id elif session_url is not None: parsed = parse.search(self.urls['SESSION_DETAIL_PARSE_PATTERN'], session_url) session_id = parsed['session_id'] logging.info("Getting session %d from %s", session_id, session_url) session = Session(numeric_id=session_id) time.sleep(self.config.WAIT_TIME) response = self.user_agent.open(session_url) # forms for later attachment download mechanize_forms = mechanize.ParseResponse(response, backwards_compat=False) # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) # check for page errors try: page_title = dom.xpath('//h1')[0].text if 'Fehlermeldung' in page_title: logging.info("Page %s cannot be accessed due to server error", session_url) if self.options.verbose: print "Page %s cannot be accessed due to server error" % session_url return if 'Berechtigungsfehler' in page_title: logging.info("Page %s cannot be accessed due to permissions", session_url) if self.options.verbose: print "Page %s cannot be accessed due to permissions" % session_url return except: pass try: error_h3 = dom.xpath('//h3[@class="smc_h3"]')[0].text.strip() if 'Keine Daten gefunden' in error_h3: logging.info("Page %s does not contain any agenda items", session_url) if self.options.verbose: print "Page %s does not contain agenda items" % session_url return except: pass session.original_url = session_url # Session title try: session.title = dom.xpath(self.xpath['SESSION_DETAIL_TITLE'])[0].text except: logging.critical('Cannot find session title element using XPath SESSION_DETAIL_TITLE') raise TemplateError('Cannot find session title element using XPath SESSION_DETAIL_TITLE') # Committe link try: links = dom.xpath(self.xpath['SESSION_DETAIL_COMMITTEE_LINK']) for link in links: href = link.get('href') parsed = parse.search(self.urls['COMMITTEE_DETAIL_PARSE_PATTERN'], href) if parsed is not None: session.committee_id = parsed['committee_id'] except: logging.critical('Cannot find link to committee detail page using SESSION_DETAIL_COMMITTEE_LINK_XPATH') raise TemplateError('Cannot find link to committee detail page using SESSION_DETAIL_COMMITTEE_LINK_XPATH') # Session identifier, date, address etc tds = dom.xpath(self.xpath['SESSION_DETAIL_IDENTIFIER_TD']) if len(tds) == 0: logging.critical('Cannot find table fields using SESSION_DETAIL_IDENTIFIER_TD_XPATH') raise TemplateError('Cannot find table fields using SESSION_DETAIL_IDENTIFIER_TD_XPATH') else: for n in range(0, len(tds)): try: tdcontent = tds[n].text.strip() nextcontent = tds[n + 1].text.strip() except: continue if tdcontent == 'Sitzung:': session.identifier = nextcontent elif tdcontent == 'Gremium:': session.committee_name = nextcontent elif tdcontent == 'Datum:': datestring = nextcontent if tds[n + 2].text == 'Zeit:': if (n + 3) in tds and tds[n + 3].text is not None: datestring + ' ' + tds[n + 3].text session.date_start = datestring elif tdcontent == 'Raum:': session.address = " ".join(tds[n + 1].xpath('./text()')) elif tdcontent == 'Bezeichnung:': session.description = nextcontent if not hasattr(session, 'identifier'): logging.critical('Cannot find session identifier using XPath SESSION_DETAIL_IDENTIFIER_TD') raise TemplateError('Cannot find session identifier using XPath SESSION_DETAIL_IDENTIFIER_TD') # Agendaitems found_attachments = [] rows = dom.xpath(self.xpath['SESSION_DETAIL_AGENDA_ROWS']) if len(rows) == 0: logging.critical('Cannot find agenda using XPath SESSION_DETAIL_AGENDA_ROWS') raise TemplateError('Cannot find agenda using XPath SESSION_DETAIL_AGENDA_ROWS') else: agendaitems = {} agendaitem_id = None public = True for row in rows: row_id = row.get('id') row_classes = row.get('class').split(' ') fields = row.xpath('td') number = fields[0].xpath('./text()') if len(number) > 0: number = number[0] if number == []: number = None #print "number: %s" % number if row_id is not None: # Agendaitem main row agendaitem_id = row_id.rsplit('_', 1)[1] agendaitems[agendaitem_id] = {} agendaitems[agendaitem_id]['id'] = int(agendaitem_id) if number is not None: agendaitems[agendaitem_id]['number'] = number agendaitems[agendaitem_id]['subject'] = "; ".join(fields[1].xpath('./text()')) agendaitems[agendaitem_id]['public'] = public # submission links links = row.xpath(self.xpath['SESSION_DETAIL_AGENDA_ROWS_SUBMISSION_LINK']) submissions = [] for link in links: href = link.get('href') if href is None: continue parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], href) if parsed is not None: submission = Submission(numeric_id=int(parsed['submission_id']), identifier=link.text) submissions.append(submission) # Add submission to submission queue if hasattr(self, 'submission_queue'): self.submission_queue.add(int(parsed['submission_id'])) if len(submissions): agendaitems[agendaitem_id]['submissions'] = submissions """ Note: we don't scrape agendaitem-related attachments for now, based on the assumption that they are all found via submission detail pages. All we do here is get a list of attachment IDs in found_attachments """ #attachments = [] forms = row.xpath('.//form') for form in forms: for hidden_field in form.xpath('input'): if hidden_field.get('name') != 'DT': continue attachment_id = hidden_field.get('value') #attachments.append(attachment_id) found_attachments.append(attachment_id) #if len(attachments): # agendaitems[agendaitem_id]['attachments'] = attachments elif 'smc_tophz' in row_classes: # additional (optional row for agendaitem) label = fields[1].text value = fields[2].text if label is not None and value is not None: label = label.strip() value = value.strip() #print (label, value) if label in ['Ergebnis:', 'Beschluss:']: if value in self.config.RESULT_STRINGS: agendaitems[agendaitem_id]['result'] = self.config.RESULT_STRINGS[value] else: logging.warn("String '%s' not found in configured RESULT_STRINGS", value) if self.options.verbose: print "WARNING: String '%s' not found in RESULT_STRINGS\n" % value agendaitems[agendaitem_id]['result'] = value elif label == 'Bemerkung:': agendaitems[agendaitem_id]['result_note'] = value elif label == 'Abstimmung:': agendaitems[agendaitem_id]['voting'] = value else: logging.critical("Agendaitem info label '%s' is unknown", label) raise ValueError('Agendaitem info label "%s" is unknown' % label) elif 'smcrowh' in row_classes: # Subheading (public / nonpublic part) if fields[0].text is not None and "Nicht öffentlich" in fields[0].text.encode('utf-8'): public = False #print json.dumps(agendaitems, indent=2) session.agendaitems = agendaitems.values() # session-related attachments containers = dom.xpath(self.xpath['SESSION_DETAIL_ATTACHMENTS']) for container in containers: classes = container.get('class') if classes is None: continue classes = classes.split(' ') if self.xpath['SESSION_DETAIL_ATTACHMENTS_CONTAINER_CLASSNAME'] not in classes: continue attachments = [] rows = container.xpath('.//tr') for row in rows: forms = row.xpath('.//form') for form in forms: #print "Form: ", form name = " ".join(row.xpath('./td/text()')).strip() for hidden_field in form.xpath('input'): if hidden_field.get('name') != 'DT': continue attachment_id = hidden_field.get('value') # make sure to add only those which aren't agendaitem-related if attachment_id not in found_attachments: attachment = Attachment( identifier=attachment_id, name=name ) # Traversing the whole mechanize response to submit this form for mform in mechanize_forms: #print "Form found: '%s'" % mform for control in mform.controls: if control.name == 'DT' and control.value == attachment_id: #print "Found matching form: ", control.name, control.value attachment = self.get_attachment_file(attachment, mform) attachments.append(attachment) found_attachments.append(attachment_id) if len(attachments): session.attachments = attachments oid = self.db.save_session(session) if self.options.verbose: logging.info("Session %d stored with _id %s", session_id, oid)
def test_pos(self): # basic search() test r = parse.search("a {} c", " a b c ", 2) self.assertEqual(r, None)