def _get_revisions(pageid=None, base_revision=None, chunk_size=CHUNK_SIZE, continuous=False): """ :param pageid: :return: """ w = WikiFetch() revisions = w.fetch_revisions_for_page( pageid=pageid, start_rev=base_revision, chunk_size=chunk_size, continuous=continuous ) print "Length of revisions at this level {}".format(len(revisions)) return revisions
def generate(): """ Generate function is called through a job continuously. This function polls Wikipedia for recent changes and then creates analysis entry for those pages in analysis table (per analysis). - This entry will now stay in the table. If the entry was already there, it has its last_annotated revision updated in it. - Otherwise, value of 0 is provided to the last_annotated revision column and this page needs to get updated from first revision. - Status of an entry in analysis table is set to active indicating that it needs work to be performed - An entry now has the page id, a last known revision for reference, a last_annotated id which is either already there or is 0 for new, and analysis type :return: A dict holding information of pages with recent changes """ generated_entries = set( ) # Initializing set of analysis entries generated in one call # Get pages with changes # Comes in as a dict of pages with recent changes pages_with_changes = WikiFetch.get_recent_changes() # Set these pages open for each analysis # Create or update analysis entry in table for analysis # Iterating over all new pages with changes for k, v in pages_with_changes.iteritems(): # Extract page ID, title and latest revision from Wikipedia. # Language not available in this call pageid = k title = v['title'] last_known_rev = v['last_known_rev'] # Update page table with this page's entry. query = (db.wikipages.pageid == pageid) page_id = db.wikipages.update_or_insert(query, pageid=pageid, last_known_rev=last_known_rev, title=title) # Get available analysis_types analysis_types = _get_analysis_type_from_db() print(analysis_types) # For each available analysis, make an analysis requirement for analysis in analysis_types.keys(): # Call the create analysis function for writing entry in the DB. analysis_entry = create_analysis(pageid=pageid, analysis_type=analysis) print analysis_entry if analysis_entry['status'] == "SUCCESS": # If an entry has been created, add it to the set of generated entries generated_entries.add(analysis_entry['value']) else: # TODO: Based on response, log the error or success of entries. pass return locals()
def _get_revisions(pageid=None, base_revision=None, chunk_size=CHUNK_SIZE, continuous=False): """ :param pageid: :return: """ w = WikiFetch() revisions = w.fetch_revisions_for_page(pageid=pageid, start_rev=base_revision, chunk_size=chunk_size, continuous=continuous) print "Length of revisions at this level {}".format(len(revisions)) return revisions
def generate(): """ Generate function is called through a job continuously. This function polls Wikipedia for recent changes and then creates analysis entry for those pages in analysis table (per analysis). - This entry will now stay in the table. If the entry was already there, it has its last_annotated revision updated in it. - Otherwise, value of 0 is provided to the last_annotated revision column and this page needs to get updated from first revision. - Status of an entry in analysis table is set to active indicating that it needs work to be performed - An entry now has the page id, a last known revision for reference, a last_annotated id which is either already there or is 0 for new, and analysis type :return: A dict holding information of pages with recent changes """ generated_entries = set() # Initializing set of analysis entries generated in one call # Get pages with changes # Comes in as a dict of pages with recent changes pages_with_changes = WikiFetch.get_recent_changes() # Set these pages open for each analysis # Create or update analysis entry in table for analysis # Iterating over all new pages with changes for k, v in pages_with_changes.iteritems(): # Extract page ID, title and latest revision from Wikipedia. # Language not available in this call pageid = k title = v["title"] last_known_rev = v["last_known_rev"] # Update page table with this page's entry. query = db.wikipages.pageid == pageid page_id = db.wikipages.update_or_insert(query, pageid=pageid, last_known_rev=last_known_rev, title=title) # Get available analysis_types analysis_types = _get_analysis_type_from_db() print (analysis_types) # For each available analysis, make an analysis requirement for analysis in analysis_types.keys(): # Call the create analysis function for writing entry in the DB. analysis_entry = create_analysis(pageid=pageid, analysis_type=analysis) print analysis_entry if analysis_entry["status"] == "SUCCESS": # If an entry has been created, add it to the set of generated entries generated_entries.add(analysis_entry["value"]) else: # TODO: Based on response, log the error or success of entries. pass return locals()