def main(): jardines = files.readJsonFile("jardines.json")["jardines"] info = [] for jardin in jardines: info.append(getAdditionalInfo(jardin)) time.sleep(5) files.save_as_json_2('jardinesInfoDetallada.json', info)
def main(): data = {} soup = request.get_content_parsed("https://guia-capital-federal.escuelasyjardines.com.ar/guia-jardines-de-infantes-en-capital-federal-belgrano.htm") paginas = parsePaginator(soup) data["jardines"] = parseJardines(soup) data["paginas"] = paginas for pagina in paginas: data["jardines"] = data["jardines"] + parseJardinesPerPage(pagina) files.save_as_json_2('jardines.json', data)
def initOutputFile(candidates): if (files.exists('sources/resultsGoGoDuck.json')): print "File already exists" else: def createInitCandidate(candidate): data = {} data["nombre"] = candidate["nombre"] data["apellido"] = candidate["apellido"] data["distrito"] = candidate["distrito"] data["processed"] = False return data results = map(lambda x: createInitCandidate(x), candidates) files.save_as_json_2('sources/resultsGoGoDuck.json', results)
def main(): initOutputFile(repository.findCandidatesWithoutResources()) candidates = files.readJsonFile('sources/resultsGoGoDuck.json') processed = [] for candidate in candidates: if ((not candidate["processed"]) and (len(processed) <= 50)): result = findCandidateResources( matcher.normalize(candidate["nombre"]), matcher.normalize(candidate["apellido"]), matcher.normalize(candidate["distrito"])) candidate["resources"] = result["resources"] candidate["url"] = result["url"] if (len(result["resources"]) > 0): candidate["processed"] = True processed.append(candidate) print "candidates processed {}: {} ".format( result["url"], len(processed)) time.sleep(10) # seconds files.save_as_json_2('sources/resultsGoGoDuck.json', candidates)
def main(): precandidates = files.readJsonFile('sources/paso2017.json') output = map(precandidates) files.save_as_json_2('sources/boletas.json', output)
def main(): results = files.readJsonFile('sources/cleanedResultsGoGoDuck.json') cleanedResults = map(cleanResult, results) files.save_as_json_2('sources/cleanedResultsGoGoDuck.json', cleanedResults)
def main(): data = request.get_content( "http://olcreativa.lanacion.com.ar/dev/get_url/?key2=1mIchf9frOP7w9IJfqIC7QJ28VB0SwwfAB2bJqP4AaBM&gid=0&output=json" ) jsonOutput = process(data) files.save_as_json_2('sources/paso2017.json', jsonOutput)