def main(argv: List[str]) -> None: if len(argv) != 2: print('Wrong number of arguments.') return writer = None path = argv[1] files = [] if os.path.isfile(path): files.append(path) name = os.path.basename(path).split('.')[0] + '.asm' writer = Writer(os.path.join(os.path.dirname(path), name)) else: for f in os.listdir(path): abspath = os.path.join(path, f) if str(abspath).endswith(".vm"): files.append(abspath) name = os.path.join(path, os.path.basename(path) + '.asm') writer = Writer(name) writer.write_init() for f in files: handle = open(f, "r") name = os.path.basename(f).split('.')[0] writer.set_file_name(name) for line in handle: line = str(line).strip() if line == '' or line.startswith('//'): continue t, tokens = parse_line(line) # Write the VM command as a comment for debugging writer.write_comment(tokens) # Write the actual translated command if t == C_ARITHMETIC: writer.write_arithmetic(tokens[0]) elif t == C_PUSH: writer.write_push(tokens[1], int(tokens[2])) elif t == C_POP: writer.write_pop(tokens[1], tokens[2]) elif t == C_LABEL: writer.write_label(tokens[1]) elif t == C_GOTO: writer.write_goto(tokens[1]) elif t == C_IF: writer.write_if(tokens[1]) # elif t == C_FUNCTION: writer.write_function(tokens[1], int(tokens[2])) # elif t == C_RETURN: writer.write_return() # elif t == C_CALL: writer.write_call(tokens[1], int(tokens[2])) else: print('Invalid command') handle.close() writer.write_end() writer.close()
def run(self): with open(Util.Config.astFile, "rb") as ff: ast = pickle.load(ff) if not (Util.Config.disableAllOpti): if not (Util.Config.disableRMO): print("Performing Relu-maxpool optimization...") ReluMaxpoolOpti.ReluMaxpoolOpti().visit(ast) print("Relu-maxpool optimization done.") if not (Util.Config.disableLivenessOpti): print("Performing Garbage collection...") mtdAST = MtdAST() GC = GarbageCollector.GarbageCollector(ast) GC.run([mtdAST]) print("Garbage collection done.") # Perform type inference and annotate nodes with type information InferType().visit(ast) # if Util.Config.printASTBool : if False: PrintAST().visit(ast) print("\n") sys.stdout.flush() IRUtil.init() compiler = IRBuilderCSF() res = compiler.visit(ast) res = self.fixOuputScale(res, compiler) res = self.fixNames(res, compiler) Util.write_debug_info(compiler.name_mapping) # Insert a generic start_computation and end_computation function call after all input IR statements. res = self.insertStartEndFunctionCalls(res) writer = Writer(Util.Config.outputFileName) debugVarEzPCName = (compiler.name_mapping[Util.Config.debugVar] if (Util.Config.debugVar in compiler.name_mapping) else None) if Util.forEzPC(): codegen = EzPCCodegen(writer, compiler.globalDecls, debugVarEzPCName) else: assert False codegen.printAll(*res) writer.close()
def run(self): with open(Util.Config.astFile, 'rb') as ff: ast = pickle.load(ff) if not (Util.Config.disableAllOpti): if not (Util.Config.disableRMO): print("Performing Relu-maxpool optimization...") # Perform optimizations on the AST ReluMaxpoolOpti.ReluMaxpoolOpti().visit(ast) if not (Util.Config.disableLivenessOpti): print("Performing Liveness Optimization...") # Perform liveness analysis optimization on the AST mtdAST = MtdAST() LivenessOpti.LivenessAnalysis().visit(ast) LivenessOpti.LivenessOpti().visit(ast, [mtdAST, 0, {}]) if Util.Config.printASTBool: PrintAST().visit(ast) sys.stdout.flush() # Perform type inference InferType().visit(ast) IRUtil.init() compiler = IRBuilderCSF() res = compiler.visit(ast) Util.write_debug_info(compiler.name_mapping) # Insert a generic start_computation and end_computation function call after all input IR statements. res = self.insertStartEndFunctionCalls(res) writer = Writer(Util.Config.outputFileName) debugVarEzPCName = compiler.name_mapping[Util.Config.debugVar] if ( Util.Config.debugVar in compiler.name_mapping) else None if Util.forEzPC(): codegen = EzPCCodegen(writer, compiler.decls, debugVarEzPCName) else: assert False codegen.printAll(*res) writer.close()
class Parser: """""" #---------------------------------------------------------------------- def __init__(self): """Constructor""" self.base = "http://www.icd10data.com"; self.writer = Writer("temp.txt"); self.direction = Set(["right","left"]); self.areas = [u'finger(s)', u'leg',u'thigh', u'femur', u'thumb', u'jaw', u'pelvic region and thigh', u'initial encounter for fracture', u'humerus', 'joint', u'foot', u'mid-cervical region', u"angle's class ii", 'shoulder', u'ankle and toes', u'occipito-atlanto-axial region', u'bone', u'ulna and radius', u'ring finger', u'thoracolumbar region', u'tibia and fibula', u'vertebrae', u'ankle and joints of foot', u'arm', u'thoracic region', u'lumbar region', u'distal tibia', u'finger', u'ulna', u'subsequent encounter for fracture with malunion', 'head region', u'little finger', u"angle's class iii", u'with tophus (tophi)', u'fibula', u'central', u'proximal tibia', u'radius and ulna',u'radius', u'upper arm', u'organ involvement unspecified', u'bone plate', u'upper arms', u'high cervical region', u'excluding foot', u'distal femur', u'middle finger', u'distal humerus', u'subsequent encounter for fracture with nonunion', u'ankle', u'joints of hand', u'multiple sites in spine', u'sequela', u'proximal femur', u'index finger', u'distal radius', u'ear', u'organ or system involvement unspecified', u'sequela of fracture', u'without tophus (tophi)', u'with other organ involvement', u'with respiratory involvement', 'elbow', u'lumbosacral region', u'hip', u'forearm', u'thoracolumbar and lumbosacral intervertebral disc disorder', u'pelvis', u'toe(s)', u'proximal humerus', u'tibia', u'with myopathy', u'subsequent encounter for fracture with routine healing', u'ankle and joints of foot', u'hand', u'finger joints', u'wrist', u'overuse and pressure other site', u'ankle and foot', u'knee', u'cervicothoracic region', u"angle's class i", u'cervical region', 'vertebra', u'upper limb', u'sacral and sacrococcygeal region', u'lower leg']; self.areas.sort(key=lambda x: len(x.split(" ")),reverse=True); #---------------------------------------------------------------------- def getmainlist(self): """""" response = urllib2.urlopen("http://www.icd10data.com/ICD10CM/Codes/M00-M99"); self.htmlparser = etree.HTMLParser() tree = etree.parse(response, self.htmlparser); self.hreflist = tree.xpath("/html/body/div[2]/div/div[4]/ul/li/a/@href"); self.hreflist = self.hreflist; self.getsublist(self.hreflist); self.writer.close(); #---------------------------------------------------------------------- def getsublist(self,hreflist): """""" for href in hreflist: response = urllib2.urlopen(self.base+href); soup = BeautifulSoup(response.read(),"lxml"); lists = soup.select("ul li span a"); for l in lists: self.selectcode(l.attrs["href"]); #---------------------------------------------------------------------- def selectcode(self,link): """""" response = urllib2.urlopen(self.base+link); soup = BeautifulSoup(response.read().decode("gbk").encode("utf-8"),"html.parser"); greenimgs = soup.select('img[src="/images/bullet_triangle_green.png"]'); for greenimg in greenimgs: sibilings = greenimg.parent.findChildren("span"); code = sibilings[0].a.text; description = sibilings[1].text; side = "NULL"; area = "NULL"; area,side = self.setarea_side(description); description = self.setdescription(sibilings[1],sibilings[0].a); self.writer.insert(code, description, 10, side, area, 0); #---------------------------------------------------------------------- def setdescription(self,description_obj,link_obj): """""" if (description_obj.text.find(u"\u2026\u2026")!=-1): response = urllib2.urlopen(self.base+link_obj.attrs["href"]); soup = BeautifulSoup(response.read().decode("gbk").encode("utf-8"),"html.parser"); description = soup.select("div div div h2")[0].text; return description; return description_obj.text; #---------------------------------------------------------------------- def setarea_side(self,description): """""" area = "NULL"; side = "NULL"; desc= description; for direction in self.direction: if (desc.find(direction)!=-1): side = direction; desc = desc.replace(direction+" ",""); break; for pos in self.areas: if (desc.find(pos)!=-1): area = pos; break; if (area=="joint" and side=="NULL"): area = "NULL" return area, side;
class Books: def __init__(self, path=None, arabic=True): self.arabic = arabic # Browsing and writing managers self.br = Browser() self.wr = Writer(path) if path else Writer() # An array for scrapped books self._books_ids = [] # Append an external books ids array to local array def append_books(self, books_ids): # Loop through sent books ids for book_id in books_ids: # Only append id if it's not stored already if book_id not in self._books_ids: self._books_ids.append(book_id) # Scrape books and write them to a file (browse is: list, lists, author or shelf) def output_books(self, keyword=None, browse="list", file_name="books"): self.wr.open(file_name, "w+") # Get books if keyword is provided, otherwise output stored books books_ids = self.get_books(keyword, browse) if keyword else self._books_ids # Loop through book ids and write them for book_id in books_ids: self.wr.write(book_id) self.wr.close() def output_books_editions(self, books_ids=None, file_name="editions"): skip = len(read_books(file_name)) self.wr.open(file_name, "a+") # Loop through book ids and write their editions id for book_id in books_ids[skip:] or self._books_ids[skip:]: editions_id = self.get_book_editions_id(book_id) # Editions id is None when page refuses to load if editions_id is None: return self.wr.close() # Write editions id to file if it loads correctly self.wr.write(editions_id or "-"*7) # Display book id and editions id print(f"Book ID:\t{book_id:<15}Book Editions ID:\t{editions_id or ''}") self.wr.close() return True def output_books_edition_by_language(self, editions_ids, lang="Arabic", file_name="ara_books"): skip = len(read_books(file_name)) self.wr.open(file_name, "a+") # Loop through book ids and write their editions id for editions_id in editions_ids[skip:]: books_ids = self.get_book_edition_by_language(editions_id, lang) if editions_id.isdigit() else '' # Editions id is None when page refuses to load if books_ids is None: return self.wr.close() # Write editions id to file if it loads correctly self.wr.write(books_ids or "-"*7) # Display book id and editions id print(f"Book Editions ID:\t{editions_id:<15}Books IDs:\t{books_ids or ''}") self.wr.close() # Open a new file to move done list to it self.wr.open(file_name + "_list") # Loop through previously scraped editions ids for line in read_books(file_name): # If line isn't empty if line != "-"*7: # Write each book edition id in a separate line [self.wr.write(id_) for id_ in line.split(',')] self.wr.close() return True # Main function to scrape books ids def get_books(self, keyword, browse="list"): # Get lists in search list if searching if browse == "lists": keywords = self._get_lists(keyword.replace(' ', '+')) browse = "list" # Otherwise, it's a single "list" or "shelf" else: keywords = [ str(key) for key in ( keyword if isinstance(keyword, list) else [keyword] )] try: # Loop through all lists for keyword in keywords: # Open each list url self.br.open_page(keyword, browse) # Scrape pages until there's no next page while True: self._scrape_list("book", self._books_ids) if not self.br.goto_next_page(): break except Exception as e: print("Couldn't go to next page:", e) finally: return self._books_ids def get_book_editions_id(self, book_id): self.br.open("/book/show/", book_id) return self.br.editions_id() def get_book_edition_by_language(self, editions_id, lang): self.br.open_book_editions(editions_id) soup = BeautifulSoup(self.br.page_source, "lxml").find(class_="workEditions") if not soup: return None editions = [] for details in soup.find_all(class_="editionData"): language, rating = [row.find(class_="dataValue") for row in details.find_all(class_="dataRow")[-3:-1]] if language.text.strip() == lang: reviewers = get_digits(rating.find("span").text) if reviewers > 50: editions.append(id_from_url.match(details.find(class_="bookTitle")["href"]).group(1)) return ','.join(editions) # Main function to scrape lists ids def _get_lists(self, keyword): lists = [] # Open GoodReads' lists search url self.br.open_list_search(keyword) # Scrape all result pages while True: self._scrape_list("list", lists) # Go to next page if there's one, otherwise break if not self.br.goto_next_page(): break return lists # Scrape a single search results page def _scrape_list(self, title, array): soup = BeautifulSoup(self.br.page_source, "lxml").find(class_="tableList") if not soup: return None for book in soup.find_all("tr"): if self.arabic or get_digits(book.find(class_="minirating").text.split("—")[1]) > 1000: try: # Get id from url id_ = id_from_url.match(book.find(class_=title + "Title")["href"]).group(1) except Exception: print("Couldn't extract Book Id from URL") continue # Extract and store unique id from link if id_ not in array: array.append(id_) print(f"{title.capitalize()} {id_:<10}count:\t{len(array)}")
def process(ROOT_PATH, CORE_FILE, fid, rev): WXRX_LOG_FILE = _get_log_file_(ROOT_PATH, fid) #set BASE_TIME from the 2nd line (logging start) in the WXRX_LOG_FILE BASE_TIME = get_base_time(WXRX_LOG_FILE) WXRX_NETCDF_FILENAME = 'weather-radar_faam_%s_r%s_%s.nc' % ( datetime.datetime.strftime(BASE_TIME, '%Y%m%d'), str(rev), str.lower(fid)) if os.path.exists(os.path.join(ROOT_PATH, WXRX_NETCDF_FILENAME)): sys.stdout.write('weather radar netCDF\n') sys.stdout.write(' ... %s\n' % os.path.join(ROOT_PATH, WXRX_NETCDF_FILENAME)) sys.stdout.write('already exists! Exiting ...\n') sys.exit(2) # get unique valid wxrx-tmp-filelist from log file wxrx_file_list = get_wxrx_tmp_filelist(WXRX_LOG_FILE) # Calculate total size of tmp-wxrx-data MAX_SIZE = np.max([ os.stat(os.path.join(ROOT_PATH, wxrx_file)).st_size for wxrx_file in wxrx_file_list ]) MAXIMUM_NUMBER_OF_RECORDS = (MAX_SIZE * 8 / 1744) + 1 wxrx_data_list = [] _RECS = np.zeros(MAXIMUM_NUMBER_OF_RECORDS, dtype=[('label', np.str_, 4), ('control_accept', np.byte), ('slave', np.byte), ('mode_annunciation', np.byte), ('faults', np.byte), ('stabilization', np.byte), ('operating_mode', np.byte), ('tilt', np.float), ('gain', np.float), ('range', np.int16), ('data_accept', np.byte), ('scan_angle', np.float), ('reflectivity', np.byte, (512, ))]) A708 = Arinc708() for wxrx_file in wxrx_file_list: sys.stdout.write('Reading ... %s\n' % (wxrx_file)) # TODO: adding progressbar to see where we are including ETA wxrx_data = Reader(os.path.join(ROOT_PATH, wxrx_file)) wxrx_data.parse() sys.stdout.write(wxrx_data) ix = [] for i in range(len(wxrx_data.Buswords)): try: _RECS[i] = A708.parse(wxrx_data.Buswords[i]) ix.append(i) except: pass wxrx_data.sIndexList = list(np.array(wxrx_data.sIndexList)[ix]) add_timestamp(wxrx_data, WXRX_LOG_FILE) wxrx_data.Records = _RECS[ix] wxrx_data_list.append(wxrx_data) # Delete to save memory del (wxrx_data) # TODO _s = Setup(os.path.join(ROOT_PATH, WXRX_NETCDF_FILENAME)) sys.stdout.write('Creating empty netCDF ...\n') sys.stdout.write('Writing data to ... %s\n' % (os.path.join(ROOT_PATH, WXRX_NETCDF_FILENAME))) wxrx_nc_writer = Writer(os.path.join(ROOT_PATH, WXRX_NETCDF_FILENAME), wxrx_data_list) wxrx_nc_writer.write() sys.stdout.write('Merging faam_core data ... %s\n' % (CORE_FILE)) # TODO wxrx_nc_writer.merge_core_file(CORE_FILE) wxrx_nc_writer.close() # create overview figure Overview( os.path.join(ROOT_PATH, WXRX_NETCDF_FILENAME), os.path.join( ROOT_PATH, '%s_%s_wxrx_overview.png' % (fid, datetime.datetime.strftime(BASE_TIME, '%Y%m%d'))))
class Reviews: def __init__(self, path=None, lang="ar", edition_reviews=False): # Language of reviews to be scraped self._lang = lang # Instantiate browsing and writing managers self.wr = Writer(path) if path else Writer() self.br = Browser(edition_reviews) # Initialize an empty threads list self._threads = [] # Counter for reviews from different languages self._invalid = None def start(self): self.br.start() # Scrape and write books' reviews to separate files def output_books_reviews(self, books_ids, consider_previous=True): if consider_previous: # Don't loop through already scraped books self.wr.consider_written_files(books_ids) # Show how many books are going to be scraped print(f"Scraping {len(books_ids)} Books") # Loop through book ids in array and scrape books for book_id in books_ids: self.output_book_reviews(book_id) # Scrape and write one book's reviews to a file def output_book_reviews(self, book_id): self._threads.clear() # Open book file and page by its Id self.br.open_book_page(book_id) self.wr.open_book_file(book_id) # Reset invalid reviews counter and page counter self._invalid = 0 # Scrape book meta data in first line self.run(self._scrape_book_meta, [book_id]) # Scrape first page of the book anyway self.run(self._scrape_book_reviews) no_next_page = False try: # Scrape the remaining pages while self._invalid < 60: # Go to next page if there's one in_next_page = self.br.goto_next_page() if no_next_page or not in_next_page: no_next_page = False # Switch to a different reviews mode if not self.br.switch_reviews_mode(book_id, in_next_page is None): # Break after switching to all modes break # Wait until requested book reviews are loaded if self.br.are_reviews_loaded(): # Scrape loaded book reviews self.run(self._scrape_book_reviews) else: no_next_page = True finally: # Wait until all threads are done [thread.join() for thread in self._threads] # Finalize file name and close it self.wr.close_book_file() # Scrape and write book and author data def _scrape_book_meta(self, html, book_id): # Create soup object and store book meta section of the page in soup soup = BeautifulSoup(html, "lxml").find(id="metacol") # If book is not found if not soup: print(f"*Book ID:\t{book_id:<15}Not Found!") # Close file and raise an error self.wr.close_book_file() raise FileNotFoundError # Get book title and remove spaces from it title = soup.find(id="bookTitle").get_text(". ", strip=True) # Get average rating of the book out of five rating = soup.find(class_="average").get_text() # Store author data section author = soup.find(class_="authorName") # Get author id from url id_ = author.get("href")[38:].split(".")[0] # Get author name name = author.find().get_text() # Write scraped meta data to file's first line self.wr.write_book_meta(book_id, title, rating, id_, name) # Display book id and title print(f"*Book ID:\t{book_id:<15}Title:\t{title}") # Scrape a single page's reviews def _scrape_book_reviews(self, html): # Store reviews section of the page in soup soup = BeautifulSoup(html, "lxml").find(id="bookReviews") # Loop through reviews individually for review in soup.find_all(class_="review"): try: # Get user / reviewer id user_id = review.find(class_="user").get("href")[11:].split("-")[0] # Get rating out of five stars stars = len(review.find(class_="staticStars").find_all(class_="p10")) # Get full review text even the hidden parts, and remove spaces and newlines comment = review.find(class_="readable").find_all("span")[-1].get_text(". ", strip=True) # Detect which language the review is in if detect(comment) != self._lang: # Count it as a different language review self._invalid += 1 continue # Get review date date = review.find(class_="reviewDate").get_text() # Skip the rest if one of the above is missing except Exception: # Count it as an invalid review self._invalid += 2 continue # If it's not a strike, reset the counter self._invalid = 0 # Get review ID review_id = review.get("id")[7:] # Write the scraped review to the file self.wr.write_review(review_id, user_id, date, stars, comment) # Add review id to ids print(f"Added ID:\t{review_id}") return True # Starts a scraping process on a new thread def run(self, method, args=[]): # Create a thread and add it to threads list then start it self._threads.append(SafeThread(target=method, args=[self.br.page_source] + args)) self._threads[-1].start() def reset(self): self.stop() self.start() print("Restarted Reviews") def stop(self): self.br.close() self.wr.delete_file() def close(self): self.br.quit() self.wr.close() self._threads.clear() print("Closed Reviews")