class Deliverables: # static options opt = { 'debug': False, 'verbose': False, 'regexp': None, 'quiet': False, 'page': False, 'file': None, 'storefile': False } def __init__(self, options=opt, url=None): # get options self.opt = options if url != None: self.opt_url = url else: self.opt_url = self.opt.url # initialize main html handler and parser self.htmlhandler = GetHTMLAndParse() # searching deliverable page self.pagesearch = GetDelivPage(self.opt_url, verbose=self.opt.verbose, debug=self.opt.debug, addkeyw=self.opt.regexp) # extracting informations from page self.recordhandler = GetDelivRecords(debug=self.opt.debug) def __debug(self, msg): if self.opt.debug: print("Debug message: " + str(msg)) """ Main method handling all objects """ def main(self): # Searching deliverable page if self.opt.page: self.links = [self.opt_url] else: self.links = self.pagesearch.get_deliverable_page() ################################## if self.links[0] == -1: return self.links if self.opt.verbose: print "*" * 80 print "Deliverable page: ", " ".join(self.links) print "*" * 80 pr = RRSProject() #Project - Url relationship if not self.opt.page: pr_url = RRSUrl(link=self.opt_url) pr_url_rel = RRSRelationshipProjectUrl() pr_url_rel.set_entity(pr_url) pr['url'] = pr_url_rel self.recordhandler.process_pages(self.links) records = self.recordhandler.get_deliverables() if type(records) == list: #create relationship Project Publication for r in records: rel = RRSRelationshipPublicationProject() #print unicode(r['title']) rel.set_entity(r) pr['publication'] = rel #create XML from RRSProject output = StringIO.StringIO() converter = Model2XMLConverter(stream=output) converter.convert(pr) out = output.getvalue() output.close() #Either return RRSProject object or XML in string or store result into a file if self.opt.storefile: r = self._storeToFile(self.opt_url, out) #test if store ok if r[0] != 1: print r[1] else: print out.encode('UTF-8') return pr else: return records def _storeToFile(self, url, res): """ From url generates filename, creates file and save res into it""" name = url.replace(':', '.').replace("/", "").replace("?", "").replace("#", "") file_name = name + ".xml" filepath = os.path.join(os.getcwd(), file_name) try: fw = open(filepath, "w") except: return (-1, 'Cannot make output file.') try: fw.write(res.encode('UTF-8')) except: return (-2, 'Cannot write data to output file.') fw.flush() fw.close() return (1, 'OK')
class Deliverables: # static options opt = { 'debug': False, 'verbose': False, 'regexp': None, 'quiet': False, 'page': False, 'file': None, 'storefile': False} def __init__(self, options=opt, url=None): # get options self.opt = options if url != None: self.opt_url = url else: self.opt_url = self.opt.url # initialize main html handler and parser self.htmlhandler = GetHTMLAndParse() # searching deliverable page self.pagesearch = GetDelivPage(self.opt_url, verbose=self.opt.verbose, debug=self.opt.debug, addkeyw=self.opt.regexp) # extracting informations from page self.recordhandler = GetDelivRecords(debug=self.opt.debug) def __debug(self,msg): if self.opt.debug: print("Debug message: " +str(msg)); """ Main method handling all objects """ def main(self): # Searching deliverable page if self.opt.page: self.links = [self.opt_url] else: self.links = self.pagesearch.get_deliverable_page() ################################## if self.links[0] == -1: return self.links if self.opt.verbose: print "*"*80 print "Deliverable page: ", " ".join(self.links) print "*"*80 pr = RRSProject() #Project - Url relationship if not self.opt.page: pr_url = RRSUrl(link=self.opt_url) pr_url_rel = RRSRelationshipProjectUrl() pr_url_rel.set_entity(pr_url) pr['url'] = pr_url_rel self.recordhandler.process_pages(self.links) records = self.recordhandler.get_deliverables() if type(records) == list: #create relationship Project Publication for r in records: rel = RRSRelationshipPublicationProject() #print unicode(r['title']) rel.set_entity(r) pr['publication'] = rel #create XML from RRSProject output = StringIO.StringIO() converter = Model2XMLConverter(stream=output) converter.convert(pr) out = output.getvalue() output.close() #Either return RRSProject object or XML in string or store result into a file if self.opt.storefile: r = self._storeToFile(self.opt_url,out) #test if store ok if r[0]!=1: print r[1] else: print out.encode('UTF-8') return pr else: return records def _storeToFile(self,url,res): """ From url generates filename, creates file and save res into it""" name = url.replace(':', '.').replace("/", "").replace("?", "").replace("#", "") file_name = name+".xml" filepath = os.path.join(os.getcwd(), file_name) try: fw = open(filepath, "w") except: return (-1, 'Cannot make output file.') try: fw.write(res.encode('UTF-8')) except: return (-2, 'Cannot write data to output file.') fw.flush() fw.close() return (1, 'OK')
class Deliverables: """ Class implementing interface for purpose of using this module in other projects """ pr = None deliverables_rrs_xml = "" regexps = [] def __init__(self,debug=False, verbose=False, quiet=True): """ Constructor of the class. Initialize deliverables extractor interface @type debug: boolean @param debug: Prints debugging additional information @type quiet: boolean @param quiet: No function will output anything on STDOUT when True. @type verbose: boolean @param verbose: Prints additional information about parsing on STDOUT when True. """ self.opt = { 'debug': False, 'verbose': verbose, 'regexp': None, 'quiet': quiet, # We actually do not permit selecting single page without search # in this version of interface 'page': False, 'file': None, # Mechanism of storing file has been overloaded # No file is stored. Output RRS-XML is stored in atribute instead 'storefile': True} links = None def parse(self,url): """ Finds deliverables page and parse data @type url: string @param url: String defining initial url for deliverables search. """ # URL of the project self.opt_url = url # initialize main html handler and parser self.htmlhandler = GetHTMLAndParse() # searching deliverable page self.pagesearch = GetDelivPage(self.opt_url, verbose=self.opt['verbose'], debug=self.opt['debug']) # extracting informations from page self.recordhandler = GetDelivRecords(debug=self.opt['debug']) # Proceed with extraction self.links = None self.main() def parse_page(self,deliverables_url): """ Finds deliverables page and parse data @type deliverables_url: string @param deliverables_url: String defining url for deliverables extraction. """ # initialize main html handler and parser self.htmlhandler = GetHTMLAndParse() # extracting informations from page self.recordhandler = GetDelivRecords(debug=self.opt['debug']) # URL of the project self.opt_url = deliverables_url # Proceed with extraction self.links = [deliverables_url] self.main() def main(self): """ Method implementing actions choosen by parameters in constructor. """ # Searching deliverable page if not self.links: self.pagesearch._sigwords.extend(self.regexps) self.links = self.pagesearch.get_deliverable_page() ################################## if self.links[0] == -1: return self.links if self.opt['verbose']: print "*"*80 print "Deliverable page: ", " ".join(self.links) print "*"*80 self.pr = RRSProject() #Project - Url relationship if not self.opt['page']: pr_url = RRSUrl(link=self.opt_url) pr_url_rel = RRSRelationshipProjectUrl() pr_url_rel.set_entity(pr_url) self.pr['url'] = pr_url_rel self.recordhandler.process_pages(self.links) records = self.recordhandler.get_deliverables() if type(records) == list: #create relationship Project Publication self.records = records for r in records: rel = RRSRelationshipPublicationProject() rel.set_entity(r) self.pr['publication'] = rel #create XML from RRSProject output = StringIO.StringIO() converter = Model2XMLConverter(stream=output) converter.convert(self.pr) out = output.getvalue() output.close() #Either return RRSProject object or XML in string or store result into a file if self.opt['storefile']: r = self._storeToFile(self.opt_url,out) #test if store ok if r[0]!=1: print r[1] else: print out.encode('UTF-8') return self.pr else: return records def _storeToFile(self,url,res): """ Overrides method from original Deliverables class. This method just saves the RRS XML string to object atribute. @type res: string @param res: Output RRS XML string for writing into object atribute. @type url: string @param url: For compatibility with Deliverables class method only. It is not used. @return: (1, 'OK') """ self.deliverables_rrs_xml = res.encode('UTF-8') return (1, 'OK') def get_deliverables(self): """ Access method to object of project with references to all parsed deliverables. It runs parsing only when necesseary. @return: None when any error is found or RRSProject instance """ return self.pr def get_rrs_xml(self): """ Access method to object of project with references to all parsed deliverables. It runs parsing only when necesseary. @return: String with RRS XML """ return self.deliverables_rrs_xml def get_json(self): """ Access method to data in form of JSON string. @return: String in JSON """ return xml2json(self.get_rrs_xml()) def get_list(self): """ Access method to object of project with references to all parsed deliverables. @return: List of RRSPublication instances """ return self.records def __debug(self,msg): """ Prints debug message. @type msg: string @param msg: String for printing on STDOUT """ if self.opt.debug: print("Debug message: " +str(msg)); def add_regexp(self,regexp): """ Prints debug message. @type regexp: string @param regexp: Regular expression pattern for adding to deliverables page ranking regexp list """ self.regexps.append(regexp) def remove_regexp(self,regexp): """ Prints debug message. @type regexp: string @param regexp: Regular expression pattern for remove from deliverables page ranking regexp list """ self.regexps.remove(regexp)