def main(): sourceId = os.environ.get('PUSH_SOURCE_ID') or '--Enter your source id--' orgId = os.environ.get('PUSH_ORG_ID') or '--Enter your org id--' apiKey = os.environ.get('PUSH_API_KEY') or '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) myfile = os.path.join('testfiles', 'Example.pptx') # Create a document mydoc = Document('file:///' + myfile) # Get the file contents and add it to the document mydoc.GetFileAndCompress(myfile) # Set the metadata mydoc.AddMetadata("connectortype", "PPTX") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", ["Coveo", "R&D"]) # Set the title mydoc.Title = "THIS IS A TEST" # Set permissions user_email = "*****@*****.**" # Create a permission Identity myperm = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) # Set the permissions on the document allowAnonymous = True mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous) # Push the document push.AddSingleDocument(mydoc)
def main(): sourceId = '--Enter your source id--' orgId = '--Enter your org id--' apiKey = '--Enter your API key--' #Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) myfile = 'testfiles\\Example.pptx' # Create a document mydoc = Document('file:///' + myfile) # Get the file contents and add it to the document mydoc.GetFileAndCompress(myfile) # Set the metadata mydoc.AddMetadata("connectortype", "CSV") authors = [] authors.append("Coveo") authors.append("R&D") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the title mydoc.Title = "THIS IS A TEST" # Set permissions user_email = "*****@*****.**" # Create a permission Identity myperm = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) # Set the permissions on the document allowAnonymous = True mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous) # Push the document push.AddSingleDocument(mydoc)
def add_document(post): # Create new push document mydoc = Document('https://myreference&id='+post['UserName']) # Build up the quickview/preview (HTML) content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>"+post['FirstName']+" "+post['LastName']+" ("+post['JobFunction']+")</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody tr:nth-child(even) td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>"+post[ 'FirstName']+"</td></tr><tr><td>MiddleName</td><td>"+post['MiddleName']+"</td></tr><tr><td>LastName</td><td>"+post['LastName']+"</td></tr><tr><td>PositionDescription</td><td>"+post['PositionDescription']+"</td></tr><tr><td>JobFunction</td><td>"+post['JobFunction']+"</td></tr><tr><td>JobFamily</td><td>post['JobFamily']</td></tr></tbody></table></div></body></html>" mydoc.SetContentAndZLibCompress(content) # Set the fileextension mydoc.FileExtension = ".html" # Set metadata mydoc.AddMetadata("connectortype", "CSV") authors = [] authors.append("Coveo") authors.append("R&D") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the date mydoc.SetDate(datetime.datetime.now()) mydoc.SetModifiedDate(datetime.datetime.now()) mydoc.Title = post['FirstName']+' ' + post['LastName']+' '+'('+post['JobFunction']+')' # Set permissions user_email = "*****@*****.**" myperm = CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) mydoc.SetAllowedAndDeniedPermissions([myperm], [], True) print('\nUser %s for title "%s"' % (user_email, post['FirstName'])) return mydoc
def main(): sourceId = '--Enter your source id--' orgId = '--Enter your org id--' apiKey = '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) # First add the document mydoc = Document("https://myreference&id=TESTME") # Set plain text mydoc.SetData("ALL OF THESE WORDS ARE SEARCHABLE") # Set FileExtension mydoc.FileExtension = ".html" # Add Metadata mydoc.AddMetadata("connectortype", "CSV") # Set the title mydoc.Title = "THIS IS A TEST" # Set permissions user_email = "*****@*****.**" # Create a permission identity myperm = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) # Set the permissions on the document allowAnonymous = True mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous) # Push the document push.AddSingleDocument(mydoc) time.sleep(100) # Remove it push.RemoveSingleDocument('https://myreference&id=TESTME')
def add_document(adres, pc, wpl, gem, prov,opp, bouwjaar, lon, lat, kamers, prijs, counter): # Create new push document mydoc = Document('https://myreference&id='+str(counter)) # Build up the quickview/preview (HTML) # Set the fileextension mydoc.FileExtension = ".html" # Set metadata if checkEmpty(lat): mydoc.AddMetadata("lat", lat) if checkEmpty(lon): mydoc.AddMetadata("lon", lon) if checkEmpty(adres): mydoc.AddMetadata("adres", adres) if checkEmpty(pc): mydoc.AddMetadata("pc", pc) if checkEmpty(wpl): mydoc.AddMetadata("wpl", wpl) if checkEmpty(gem): mydoc.AddMetadata("gem", gem) if checkEmpty(prov): mydoc.AddMetadata("prov", prov) if checkEmpty(opp): mydoc.AddMetadata("opp", opp) if checkEmpty(bouwjaar): mydoc.AddMetadata("bouwjaar", bouwjaar) if checkEmpty(kamers): mydoc.AddMetadata("kamers", kamers) if checkEmpty(prijs): mydoc.AddMetadata("prijs", prijs) mydoc.Title = adres + " in "+wpl return mydoc
def addNeighboorhood(neighboorhood): document_id = 'https://neighboorhood?id=' + neighboorhood['id'] mydoc = Document(document_id) # Set the fileextension mydoc.FileExtension = ".html" mydoc.Title = neighboorhood['name'] mydoc.AddMetadata('mydescr', neighboorhood['mydescr']) mydoc.AddMetadata('mycountry', neighboorhood['mycountry']) mydoc.AddMetadata('mycity', neighboorhood['mycity']) mydoc.AddMetadata('mylat', neighboorhood['mylat']) mydoc.AddMetadata('mylon', neighboorhood['mylon']) mydoc.AddMetadata('objecttype', 'Neighboorhood') return mydoc
def create_document(pokemon: Pokemon) -> Document: document = Document(pokemon.link) document.Title = pokemon.name document.AddMetadata('name', pokemon.name) document.AddMetadata('link', pokemon.link) document.AddMetadata('image_link', pokemon.image_link) document.AddMetadata('poke_type', pokemon.poke_type) document.AddMetadata('number', pokemon.number) document.AddMetadata('generation', pokemon.generation) document.AddMetadata('height', pokemon.height) document.AddMetadata('weight', pokemon.weight) document.AddMetadata('species', pokemon.species) document.AddMetadata('description', pokemon.description) return document
def main(): sourceId = os.environ.get('PUSH_SOURCE_ID') or '--Enter your source id--' orgId = os.environ.get('PUSH_ORG_ID') or '--Enter your org id--' apiKey = os.environ.get('PUSH_API_KEY') or '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) # Create a document mydoc = Document("https://myreference/doc2") mydoc.SetData("This is document Two") mydoc.FileExtension = ".html" mydoc.AddMetadata("authors", "*****@*****.**") mydoc.Title = "What's up Doc 2?" # Push the document push.AddSingleDocument(mydoc)
def createDoc(myfile, version): # Create a document mydoc = Document('file:///' + version + "/" + myfile) # Get the file and compress it mydoc.GetFileAndCompress(myfile) # Set Metadata mydoc.AddMetadata("connectortype", "CSV") authors = [] authors.append("Coveo") authors.append("R&D") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) mydoc.Title = "THIS IS A TEST" # Set permissions user_email = "*****@*****.**" myperm = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) mydoc.SetAllowedAndDeniedPermissions([myperm], [], True) return mydoc
def main(): sourceId = '--Enter your source id--' orgId = '--Enter your org id--' apiKey = '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) # Get a first Ordering Id startOrderingId = push.CreateOrderingId() # Create a document mydoc = Document("https://myreference&id=TESTME") # Set plain text mydoc.SetData("ALL OF THESE WORDS ARE SEARCHABLE") # Set FileExtension mydoc.FileExtension = ".html" # Add Metadata mydoc.AddMetadata("connectortype", "CSV") authors = [] authors.append("Coveo") authors.append("R&D") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the Title mydoc.Title = "THIS IS A TEST" # Set permissions user_email = "*****@*****.**" # Create a permission identity myperm = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) # Set the permissions on the document allowAnonymous = True mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous) # Push the document push.AddSingleDocument(mydoc) # Delete older documents push.DeleteOlderThan(startOrderingId)
def main(): sourceId = '--Enter your source id--' orgId = '--Enter your org id--' apiKey = '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) # Get a first Ordering Id startOrderingId = push.CreateOrderingId() # Create a document mydoc = Document('https://myreference&id=TESTME') # Set the content. This will also be available as the quickview. content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>My First Title</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody tr:nth-child(even) td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>Willem</td></tr><tr><td>MiddleName</td><td>Van</td></tr><tr><td>LastName</td><td>Post</td></tr><tr><td>PositionDescription</td><td>VP Engineering</td></tr><tr><td>JobFunction</td><td>CTO</td></tr><tr><td>JobFamily</td><td>Management</td></tr></tbody></table></div></body></html>" mydoc.SetContentAndZLibCompress(content) # Set the metadata mydoc.AddMetadata("connectortype", "CSV") authors = [] authors.append("Coveo") authors.append("R&D") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the title mydoc.Title = "THIS IS A TEST" # Add a user email to be used for identities user_email = "*****@*****.**" # Create a permission identity myperm = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) # Set the permissions on the document allowAnonymous = True mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous) # Push the document push.AddSingleDocument(mydoc) # Delete older documents push.DeleteOlderThan(startOrderingId)
def main(): sourceId = '--Enter your source id--' orgId = '--Enter your org id--' apiKey = '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) # First set the securityprovidername mysecprovidername = "MySecurityProviderTest" # Define cascading security provider information cascading = { "Email Security Provider": { "name": "Email Security Provider", "type": "EMAIL" } } # Create it push.AddSecurityProvider(mysecprovidername, "EXPANDED", cascading) startOrderingId = push.CreateOrderingId() # Delete all old entries push.DeletePermissionsOlderThan(mysecprovidername, startOrderingId) print ("Old ids removed. Updating security cache") input ("Press any key to continue...") # Create a document mydoc = Document('https://myreference&id=TESTMESECURITY') # Set the content. This will also be available as quickview for that document. content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>My First Title</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody tr:nth-child(even) td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>Willem</td></tr><tr><td>MiddleName</td><td>Van</td></tr><tr><td>LastName</td><td>Post</td></tr><tr><td>PositionDescription</td><td>VP Engineering</td></tr><tr><td>JobFunction</td><td>CTO</td></tr><tr><td>JobFamily</td><td>Management</td></tr></tbody></table></div></body></html>" mydoc.SetContentAndZLibCompress(content) # Set the metadata mydoc.AddMetadata("connectortype","CSV") authors = [] authors.append( "Coveo" ) authors.append( "R&D" ) # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the title mydoc.Title = "THIS IS A TEST" # Define a list of users that should have access to the document. users = [] users.append("wim") users.append("peter") # Define a list of users that should not have access to the document. deniedusers = [] deniedusers.append("alex") deniedusers.append("anne") # Define a list of groups that should have access to the document. groups = [] groups.append("HR") groups.append("RD") groups.append("SALES") # Create the permission Levels. Each level can include multiple sets. permLevel1 = CoveoPermissions.DocumentPermissionLevel('First') permLevel1Set1 = CoveoPermissions.DocumentPermissionSet('1Set1') permLevel1Set2 = CoveoPermissions.DocumentPermissionSet('1Set2') permLevel1Set1.AllowAnonymous = False permLevel1Set2.AllowAnonymous = False permLevel2 = CoveoPermissions.DocumentPermissionLevel('Second') permLevel2Set = CoveoPermissions.DocumentPermissionSet('2Set1') permLevel2Set.AllowAnonymous = False # Set the allowed permissions for the first set of the first level for user in users: # Create the permission identity permLevel1Set1.AddAllowedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user)) #Set the denied permissions for the second set of the first level for user in deniedusers: # Create the permission identity permLevel1Set2.AddDeniedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user)) # Set the allowed permissions for the first set of the second level for group in groups: # Create the permission identity permLevel2Set.AddAllowedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, group)) # Set the permission sets to the appropriate level permLevel1.AddPermissionSet(permLevel1Set1) permLevel1.AddPermissionSet(permLevel1Set2) permLevel2.AddPermissionSet(permLevel2Set) # Set the permissions on the document mydoc.Permissions.append(permLevel1) mydoc.Permissions.append(permLevel2) # Push the document push.AddSingleDocument(mydoc) # First do a single call to update an identity # We now also need to add the expansion/memberships/mappings to the security cache # The previouslt defined identities were: alex, anne, wim, peter usersingroup = [] usersingroup.append("wimingroup") usersingroup.append("peteringroup") # Remove the last group, so we can add it later with a single call groups.pop() push.StartExpansion( mysecprovidername ) # group memberships for: HR, RD for group in groups: # for each group set the users members = [] for user in usersingroup: # Create a permission Identity members.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user )) push.AddExpansionMember(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, group ), members, [],[] ) # mappings for all users, from userid to email address users.extend(deniedusers) users.extend(usersingroup) for user in users: # Create a permission Identity mappings=[] mappings.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, "Email Security Provider", user+"@coveo.com" )) wellknowns=[] wellknowns.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone")) push.AddExpansionMapping(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user ), [], mappings, wellknowns ) # Remove deleted users # Deleted Users delusers = [] delusers.append("wimn") delusers.append("petern") for user in delusers: # Add each identity to delete to the Deleted push.AddExpansionDeleted(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user ),[],[],[]) # End the expansion and write the last batch push.EndExpansion( mysecprovidername ) print ("Now updating security cache.") print ("Check:") print (" HR/RD groups: members wimingroup, peteringroup") print (" SALES: should not have any members") print (" each user: wim, peter, anne, wimingroup should have also mappings to Email security providers") input ("Press any key to continue...") # Add a single call, add the Sales group usersingroup = [] usersingroup.append("wiminsalesgroup") usersingroup.append("peterinsalesgroup") members = [] for user in usersingroup: # Create a permission identity mappings = [] mappings.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, "Email Security Provider", user + "@coveo.com")) wellknowns = [] wellknowns.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone")) members.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user)) push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user), [], mappings, wellknowns) push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone"), members, [],[]) push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "SALES"), members, [],[]) print ("Now updating security cache.") print ("Check:") print (" HR/RD groups: members wimingroup, peteringroup") print (" SALES: should have members wiminsalesgroup, peterinsalesgroup") print (" each user: wim, peter, anne, wimingroup should also have mappings to Email security providers") input ("Press any key to continue...") # Remove a Identity # Group SALES should be removed push.RemovePermissionIdentity(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "SALES")) print ("Now updating security cache.") print ("Check:") print (" HR/RD groups: members wimingroup,peteringroup") print (" NO wiminsalesgroup,peterinsalesgroup") print (" each user: wim, peter, anne, wimingroup should have also mappings to Email security providers")
def scrap(): pokemon_list_page = requests.get('https://pokemondb.net/pokedex/national') soup_pokemon_list_page = BeautifulSoup(pokemon_list_page.content, 'html.parser') results = soup_pokemon_list_page.find(id='main') info_cards = results.find_all('div', class_='infocard') coveo_source_id = os.environ.get("COVEO_SOURCE_ID") coveo_api_key = os.environ.get("COVEO_API_KEY") coveo_org_id = os.environ.get("COVEO_ORG_ID") push = CoveoPush.Push(coveo_source_id, coveo_org_id, coveo_api_key) push.Start(True, True) push.SetSizeMaxRequest(150 * 1024 * 1024) user_email = os.environ.get("USER_EMAIL") my_permissions = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) for info_card in info_cards: pokemon_name = info_card.find('a', class_='ent-name').text pokemon_page_url = 'https://pokemondb.net' + info_card.find( 'a', class_='ent-name')['href'] document = Document(pokemon_page_url) pokemon_picture_url = info_card.find('span', class_='img-fixed img-sprite') if pokemon_picture_url is None: pokemon_picture_url = info_card.find( 'span', class_='img-fixed img-sprite img-sprite-v18')['data-src'] else: pokemon_picture_url = info_card.find( 'span', class_='img-fixed img-sprite')['data-src'] pokemon_number = info_card.find('small').text[1:] pokemon_gen = find_gen(int(pokemon_number)) pokemon_types = [] pokemon_types_tags = info_card.find_all('small')[1].find_all('a') print('scrapping pokemon: ' + pokemon_name + ' | index : ' + pokemon_number) for pokemon_type_tag in pokemon_types_tags: pokemon_types.append(pokemon_type_tag.text) pokemon_page = requests.get(pokemon_page_url) soup_pokemon_page = BeautifulSoup(pokemon_page.content, 'html.parser') results = soup_pokemon_page.find(id='main') tables = results.find_all('table', class_='vitals-table') pokemon_species = tables[0].find_all('tr')[2].find('td').text pokemon_height = tables[0].find_all('tr')[3].find('td').text pokemon_weight = tables[0].find_all('tr')[4].find('td').text base_stats = {} base_stats_tags = tables[3].find_all('tr') for base_stat_tag in base_stats_tags: base_stats[base_stat_tag.find('th').text] = base_stat_tag.find( 'td').text defense = {} defenses_tables = results.find_all( 'table', class_='type-table type-table-pokedex') for defense_table in defenses_tables: for x in range( 0, len(defense_table.find_all('tr')[0].find_all('th'))): defense[defense_table.find_all('tr')[0].find_all('th')[x].find('a').text] = \ defense_table.find_all('tr')[1].find_all('td')[x].text document.AddMetadata( defense_table.find_all('tr')[0].find_all('th')[x].find( 'a').text, defense_table.find_all('tr')[1].find_all('td')[x].text) document.Title = pokemon_name document.SetData(pokemon_page.text) document.FileExtension = ".html" document.AddMetadata('name', pokemon_name) document.AddMetadata('url', pokemon_page_url) document.AddMetadata('number', pokemon_number) document.AddMetadata('generation', pokemon_gen) document.AddMetadata('types', pokemon_types) document.AddMetadata('specie', pokemon_species) document.AddMetadata('weight', pokemon_weight) document.AddMetadata('weight_int', pokemon_weight[0:pokemon_weight.index('kg') - 1]) document.AddMetadata('height', pokemon_height) document.AddMetadata('height_int', pokemon_height[0:pokemon_height.index('m') - 1]) document.AddMetadata('hp', base_stats.get('HP')) document.AddMetadata('hp_int', base_stats.get('HP')) document.AddMetadata('attack', base_stats.get('Attack')) document.AddMetadata('attack_int', base_stats.get('Attack')) document.AddMetadata('defense', base_stats.get('Defense')) document.AddMetadata('defense_int', base_stats.get('Defense')) document.AddMetadata('sp_atk', base_stats.get('Sp.Atk')) document.AddMetadata('sp_def', base_stats.get('Sp.Def')) document.AddMetadata('speed', base_stats.get('Speed')) document.AddMetadata('speed_int', base_stats.get('Speed')) document.AddMetadata('picture_url', pokemon_picture_url) document.SetAllowedAndDeniedPermissions([my_permissions], [], True) print('Send: ' + pokemon_name + ' | index : ' + pokemon_number + ' to the PUSH API') push.Add(document) print('Sent: ' + pokemon_name + ' | index : ' + pokemon_number + ' to the PUSH API') push.End(True, True)
poke_type = poke_type[:-1] # This make sure that there are no special characthers try: print(name) except: name = name[:-1] # First add the document mydoc = Document(link) # Set plain text mydoc.SetData(name + ' ' + poke_type.replace(";", " ") + ' ' + generation) # Set FileExtension mydoc.FileExtension = ".html" # Add Metadata mydoc.AddMetadata("connectortype", "HTML") mydoc.AddMetadata("pokemon_name", name) mydoc.AddMetadata("pokemon_picture", pic_url) mydoc.AddMetadata("pokemon_type", poke_type) mydoc.AddMetadata("pokemon_generation", generation[len(generation) - 1:]) mydoc.AddMetadata("pokemon_number", number) # Set the title mydoc.Title = name # Push the document push.AddSingleDocument(mydoc)