def get_oredict_from_wiki(name, log_node): contained_items = set() for item_page in wp.scrape_oredict(name): try: display_name, mod = wp.parse_pagetitle(item_page) new_item = hitDB_or_wiki_for_item(display_name, mod, item_page, log_node) contained_items.add( new_item ) except wp.BadItemPageException as err: log_node.add_node(err) if len(contained_items) == 0: raise wp.NoOreDictException() new_dict = OreDict.objects.create(name = name) new_dict_parent = log_node.add_node(new_DB_entry(new_dict)) for contained_item in contained_items: new_dict.item_set.add(contained_item) new_dict_parent.add_node(new_DB_entry(contained_item)) return new_dict
def get_oredict_from_wiki(name, log): contained_items = set() for item_page in wp.scrape_oredict(name): try: display_name, mod = wp.parse_pagetitle(item_page) new_item = hitDB_or_wiki_for_item(display_name, mod, item_page) contained_items.add(new_item) except wp.BadItemPageException as err: log.append(err) if len(contained_items) == 0: raise wp.NoOreDictException() new_dict = OreDict.objects.create(name=name) log.append(result_dbEntry(new_dict)) for contained_item in contained_items: new_dict.item_set.add(contained_item) log.append(result_dbEntry(contained_item)) log.append('Adding recipes using Oredict subsitutions</ul>') return new_dict
def old_collect_mod(mod_name, root_lognode, modpack = 'all'): skip_items = set() skip_mods = set() mod_items_endpoint = 'https://ftbwiki.org/api.php?action=query&list=categorymembers&cmtitle=Category:{}&cmlimit=10000&format=json' r = requests.get(mod_items_endpoint.format(mod_name), True) mod_node = root_lognode.add_node('Collecting Mod: {}'.format(mod_name), 'max') print(mod_node) try: item_pages = json.loads(r.text)['query']['categorymembers'] except KeyError: print(mod_node.add_node('Mod: {} contains no items'.format(mod_name))) for item_page_data in item_pages: page_title = item_page_data['title'] item_node = mod_node.add_node('Collecting Item: {}'.format(page_title)) print(item_node) display_name, mod = wp.parse_pagetitle(page_title) item = None try: try: while item == None: try: item = rf.hitDB_or_wiki_for_item(display_name, mod, page_title, item_node) except wp.IncompletePageException as err: print(item_node.add_node(str(err))) open_webpage(err) except wp.NoWikiTextException as err: print(item_node.add_node(str(err))) open_webpage(wp.IncompletePageException(page_title, {'display_name':display_name, 'mod':mod})) except wp.BadItemPageException as err: print(item_node.add_node(err)) if item: # continue writing here q = wp.PageParser(page_title) for recipe in q.scrape_recipes(): if not any(banned_item_page in recipe['recipe_terms'] for banned_item_page in skip_items): while True: try: prelim_recipe_data = rf.instantiate_recipe(page_title, item, **recipe) #print('From mod:', prelim_recipe_data['new_recipe'].from_mod) #print(modpack == 'all', prelim_recipe_data['new_recipe'].from_mod in ModPack.objects.get(name = modpack).mods.all(), not prelim_recipe_data['new_recipe'].from_mod.name in skip_mods) if (modpack == 'all' or prelim_recipe_data['new_recipe'].from_mod in ModPack.objects.get(name = modpack).mods.all()) and not prelim_recipe_data['new_recipe'].from_mod.id in skip_mods: try: sub_log = rf.parse_recipe(**prelim_recipe_data) except rf.ConstructRecipeException as err: print(item_node.add_node(str(prelim_recipe_data['parent_node'].value) + '-> ERROR: ' + str(err.sub_error))) try: raise err.sub_error except wp.IncompletePageException as err: # fix this here from opening prompts for mods that are skipped if not err.scraped_data['mod'] in skip_mods: open_webpage(err) except wp.BadItemPageException as err: incomplete_exception = wp.IncompletePageException(page_title, {'display_name':err.display_name, 'mod':err.mod}) open_webpage(wp.IncompletePageException(page_title, {'display_name':err.display_name, 'mod':err.mod})) #except wp.BadItemPageException as err: else: item_node.connect_nodes(sub_log) print(sub_log) break else: break except rf.ConstructRecipeException as err: print(item_node.add_node(err)) except UserSkippedException as err: print(item_node.add_node(err)) if err.skip_item: skip_items.add(page_title) if err.skip_mod and err.incomplete_page_error.scraped_data['mod']: skip_mods.add(err.incomplete_page_error.scraped_data['mod']) break else: print(item_node.add_node('Template {} contains skipped item.'.format(recipe['header']))) except UserSkippedException as err: print(item_node.add_node(err)) if err.skip_item: skip_items.add(page_title) if err.skip_mod and err.incomplete_page_error.scraped_data['mod']: skip_mods.add(err.incomplete_page_error.scraped_data['mod'])
def scrapeData(request, page_title=None): # validate pagename if (request.method == 'POST' and 'page_selection' in request.POST) or page_title: if not page_title: page_title = request.POST['page_selection'] display_name, mod = wp.parse_pagetitle(page_title) try: wikidata = wp.PageParser(page_title) except wp.NoWikiTextException as err: return render(request, 'craftDB/disambiguation.html', { 'pages': request.session['pages'], 'error_message': str(err) }) item = None try: item = Item.find_item(display_name, mod) except Item.DoesNotExist: try: infobox_data = wikidata.scrape_infobox() try: infobox_data['mod'] = Mod.objects.get( name=infobox_data['mod']).id except KeyError: raise wp.IncompletePageException(page_title, infobox_data) except Mod.DoesNotExist: raise wp.BadItemPageException( 'Item: {} is not in your modpack'.format(page_title)) item_form = ItemForm(infobox_data) if not item_form.is_valid(): raise wp.IncompletePageException(page_title, infobox_data) item = item_form.save() except wp.BadItemPageException as err: return render(request, 'craftDB/disambiguation.html', { 'pages': request.session['pages'], 'error_message': str(err) }) request.session['output_item'] = item.id try: recipes_on_page = wikidata.scrape_recipes() except wp.NoRecipesException as err: return render(request, 'craftDB/disambiguation.html', { 'pages': request.session['pages'], 'error_message': str(err) }) request.session['scraped_data'] = recipes_on_page request.session['page_title'] = page_title return render(request, 'craftDB/chooseRecipeForm.html', { 'output': page_title, 'recipes': recipes_on_page }) else: return render(request, 'craftDB/disambiguation.html', {'pages': request.session['pages']})