def execute(self, data, _sender=None, value="0", receiver=NFT_CONTRACT, gasLimit=60000000): if _sender is None: _sender = self._sender _sender.sync_nonce(self._proxy) t = Transaction() t.nonce = _sender.nonce t.version = get_tx_version() t.data = data t.receiver = receiver t.chainID = self._proxy.get_chain_id() t.gasLimit = gasLimit t.value = value t.sender = self._sender.address.bech32() t.gasPrice = DEFAULT_GAS_PRICE t.sign(self._sender) log("Execution d'une transaction sur " + BC_EXPLORER + "/address/" + t.sender) rc = t.send_wait_result(self._proxy, 60000) for r in rc["smartContractResults"]: if "data" in r: r["result"] = list() for p in r["data"].split("@"): if len(p) > 0: r["result"].append(hex_to_str(int(p, 16))) return rc["smartContractResults"]
def send_to(request): body=request.data text=body["text"].replace("’","") social_link="" if "social" in body and "value" in body["social"] and len(body["social"]["value"])>0: social_link="<br>Vous pouvez répondre directement via <a href='"+body["social"]["value"]+"'>"+body["social"]["label"]+"</a>" log("Envoie du mail " + text) _from=User.objects.get(id=body["_from"]) _profil=Profil.objects.get(id=body['_to']) #TODO vérifier la black liste cc="" if "send_copy" in body and body["send_copy"]: cc = _from["email"] fullname=_from.first_name+" "+_from.last_name sendmail( subject="["+APPNAME+"] Message de "+fullname, template="contact.html", field={"text":text,"social_link":social_link,"fullname":fullname}, _to=[_profil.email,cc] ) return Response("Message envoyé", status=200)
def extract_movie_from_bdfci(pow: PieceOfWork, refresh_delay=31): title = pow.title.replace(" ", "+") page = load_page("https://www.bdfci.info/?q=" + title + "&pa=f&d=f&page=search&src=bdfci&startFrom=1&offset=1", refresh_delay=refresh_delay) articles = page.find_all("article") url_ref = None if len(articles) == 0: entete = page.find("h1") if not entete is None: text_entete = entete.text.split("<")[0].lower() if text_entete == pow.title.lower(): url_ref = page else: url = articles[0].find("a") if url is not None and url.attrs["title"].lower() == str( pow.title).lower(): url_ref = "https://www.bdfci.info" + url.attrs["href"] if url_ref is not None: pow.add_link(url_ref, "BDFI") log("Ajout du lien BDFCI:" + url_ref + " pour " + pow.title) pow.dtLastSearch = datetime.now() pow.save() return title
def extract_profil_from_imdb(lastname: str, firstname: str): peoples = ia.search_person(firstname + " " + lastname) infos = dict() for p in peoples: name = p.data["name"].upper() if firstname.upper() in name and lastname.upper() in name: if not "nopicture" in p.data["headshot"]: infos["photo"] = p.data["headshot"] if not "url" in infos: infos["url"] = "https://imdb.com/name/nm" + p.personID + "/" log("Ouverture de " + infos["url"]) page = load_page(infos["url"]) film_zone = page.find("div", {"id": "filmography"}) if film_zone is None: film_zone = page links = film_zone.findAll( 'a', attrs={'href': wikipedia.re.compile("^/title/tt")}) infos["links"] = [] for l in links: if len( l.getText() ) > 3 and l.parent.parent.parent.parent and l.parent.parent.parent.parent[ "id"] == "filmography": texts = l.parent.parent.text.split("(") nature = "long" job: str = l.parent.parent.get("id").split("-")[0] if job == "miscellaneous" or len(job) == 0: temp = l.parent.parent.text.split("(") job = temp[len(temp) - 1].split(")")[0] pass url = "https://www.imdb.com" + l.get("href") url = url.split("?")[0] if len(texts) > 1: nature = "" for nat in MOVIE_NATURE: if nat.lower() in texts[1].lower(): nature = nat break if nature == "": log("Nature inconnue depuis " + texts[1] + " pour " + url) if len(texts) > 2 and len(job) == 0: job = texts[2].split(")")[0] infos["links"].append({ "url": url, "text": l.getText(), "job": job, "nature": nature }) return infos
def init_token(self): rc = self.execute( "issueNonFungible@" + toHex("FEMISToken", False) + "@" + toHex("FEMIS", False), self._sender, NFT_CREATE_COST) if len(rc) > 0 and len(rc[0]["result"]) > 1: token_id = rc[0]["result"][1] log("Création de " + token_id) rc = self.execute("setSpecialRole@" + toHex(token_id, False) + "@" + self._sender.address.hex() + "@" + toHex("ESDTNFTCreate", False)) return rc
def update_dictionnary(request): for w in Work.objects.all(): job=translate(w.job) if job!=w.job: log("Traitement de "+str(w.job)) w.job=job w.save() for p in PieceOfWork.objects.all(): category=translate(p.category) if category!=p.category: p.category=category p.save() return Response({"message":"ok"})
def extract_film_from_senscritique(title: str, refresh_delay=31): url = "https://www.senscritique.com/search?q=" + urlencode(title.lower()) log("Recherche sur sens-critique : " + url) pages = load_page(url, save=False) pages = pages.find_all("div", {"data-qa": "hits"}) if len(pages) > 0: links = pages[0].find_all("a") url = "" for l in links: if "href" in l.attrs and l.attrs["href"].startswith( "https://www.senscritique.com/film/"): if l.getText().lower() == title.lower(): url = l["href"] log("Extraction de " + url) page = load_page(url, refresh_delay) return url return None
def analyse_pows(pows: list, search_with="link", bot=None, cat="unifrance,imdb,lefilmfrancais"): infos = list() for pow in pows: pow.dtLastSearch = datetime.now() pow.save() if search_with == "link": for l in pow.links: if "auto:IMDB" in l["text"]: info = extract_film_from_imdb(l["url"], pow.title) if "auto:unifrance" in l["text"]: info = extract_film_from_unifrance(l["url"], pow.title) infos.append(info) if search_with == "title": title = pow.title year = pow.year if title and year: for source in cat.split(","): log("Analyse de " + source) if source == "unifrance": film = extract_film_from_unifrance(title) if source == "imdb": film = extract_film_from_imdb(title, title=title) if source == "lefilmfrancais": if bot is None: bot = connect_to_lefilmfrancais( "*****@*****.**", "UALHa") film = extract_film_from_leFilmFrancais(title, bot=bot) if film: pow_2 = dict_to_pow(film) if pow_2.year == year and equal_str( pow_2.title, title): pow, hasChanged = fusion(pow, pow_2) if hasChanged: pow.save() bot.quit() bot = None return infos
def extract_profil_from_imdb(lastname: str, firstname: str, refresh_delay=31): peoples = ia.search_person( remove_accents(firstname) + " " + remove_accents(lastname)) infos = dict() for p in peoples: name = remove_accents(remove_ponctuation(p.data["name"].upper())) if firstname.upper() in name and lastname.upper() in name: if not "nopicture" in p.data["headshot"]: infos["photo"] = p.data["headshot"] if not "url" in infos: infos["url"] = "https://imdb.com/name/nm" + p.personID + "/" log("Ouverture de " + infos["url"]) page = load_page(infos["url"], refresh_delay=refresh_delay) film_zone = page.find("div", {"id": "filmography"}) if film_zone is None: film_zone = page #Contient l'ensemble des liens qui renvoi vers une oeuvre infos["links"] = [] links = film_zone.findAll( 'a', attrs={'href': wikipedia.re.compile("^/title/tt")}) for l in links: if len( l.getText() ) > 3 and l.parent.parent.parent.parent and l.parent.parent.parent.parent[ "id"] == "filmography": texts = l.parent.parent.text.split("(") nature = "long" job: str = l.parent.parent.get("id").split("-")[0] if job == "miscellaneous" or len(job) == 0: temp = l.parent.parent.text.split("(") job = temp[len(temp) - 1].split(")")[0] pass else: if not in_dict(job, "jobs"): job = "" url = "https://www.imdb.com" + l.get("href") url = url.split("?")[0] infos["links"].append({ "url": url, "text": l.getText(), "job": "", "nature": "" }) return infos
def raz(request): filter=request.GET.get("tables","all") log("Effacement de "+filter) if "profils" in filter or filter=="all": log("Effacement des profils") Profil.objects.all().delete() if "users" in filter or filter=="all": log("Effacement des utilisateurs") User.objects.all().delete() if "pows" in filter or filter=="all": log("Effacement des oeuvres") PieceOfWork.objects.all().delete() log("Effacement de la base terminée") return Response({"message":"Compte effacé"})
def create_user_profile(sender, instance, created, **kwargs): """ Création d'un utilisateur :param sender: :param instance: :param created: :param kwargs: :return: """ if created: log("Creation de l'extrauser associé") perms = yaml.safe_load( open(STATIC_ROOT + "/profils.yaml", "r", encoding="utf-8").read()) perm = "" for p in perms["profils"]: if p["id"] == DEFAULT_PERMS_PROFIL: perm = p["perm"] break log("Permission par défaut pour les connectés : " + perm) ExtraUser.objects.create(user=instance, perm=perm)
def analyse(self, profils): n_profils = 0 for profil in profils: bSave = False if len(profil.town) == 0 or profil.town == "0": if len(profil.cp) > 0: profil.town = self.find_city(profil.cp) bSave = True else: self.add_bad_profil(profil, "Impossible de retrouver la ville") else: if profil.town != profil.town.upper(): profil.town = profil.town.upper() bSave = True if bSave: log("Enregistrement de " + str(profil)) profil.save() n_profils = n_profils + 1 return n_profils, self.log
def create(self, data): """ Création d'un profil utilisateur avec initialisation du mot de passe :param data: :return: """ log("Création du password, du user et du token") if data["username"].startswith("___"): password = data["username"].replace("___", "") data["username"] = data["email"] sendmail( "Voici votre code de connexion via mail", [data["email"]], "welcome_google", dict({ "email": data["email"], "url_appli": DOMAIN_APPLI + "/?email=" + data["email"], "firstname": data["first_name"], "code": password, "appname": APPNAME })) else: password = reset_password(data["email"], data["username"]) if not "first_name" in data: data["first_name"] = data["email"].split(".")[0] if not "last_name" in data: data["last_name"] = "" user = User.objects.create_user( username=data["username"], password=password, email=data["email"], first_name=data["first_name"], last_name=data["last_name"], ) token = Token.objects.create(user=user) return user
def fusion(self, p_old, p_new): try: log("Destruction de " + str(p_old)) p_old.delete() return True except: log("Destruction de " + str(p_new)) try: p_new.delete() return True except: log("Destruction impossible entre " + str(p_old) + " et " + str(p_new)) return False
def find_double(self, with_fusion=True): log("Recherche des doublons sur les films") rc = 0 for p1 in self.pows: for p2 in self.pows: d = jellyfish.jaro_similarity(p1.title.lower(), p2.title.lower()) if d > 0.97 and p1.year == p2.year and p1.id != p2.id: log("Suspission de doublon entre " + str(p1) + " et " + str(p2)) if with_fusion: if p1.quality_score() > p2.quality_score(): b = self.fusion(p2, p1) else: b = self.fusion(p1, p2) if b: log("Fusion réalisée") rc = rc + 1 return rc
def extract_film_from_unifrance(url: str, job_for=None): rc = dict() if not url.startswith("http"): log("On passe par la page de recherche pour retrouver le titre") page = load_page("https://unifrance.org/recherche?q=" + parse.quote(url)) _link = page.find("a", attrs={ 'href': wikipedia.re.compile( "^https://www.unifrance.org/film/[0-9][0-9]") }) if _link is None: return rc url = _link.get("href") #r=wikipedia.requests.get(url, headers={'User-Agent': 'Mozilla/5.0',"accept-encoding": "gzip, deflate"}) #page = wikipedia.BeautifulSoup(str(r.content,encoding="utf-8"),"html5lib") page = load_page(url) _title = page.find('h1', attrs={'itemprop': "name"}) if not _title is None: rc["title"] = _title.text log("Analyse du film " + rc["title"]) for title in page.findAll('h1'): if "Affiches" in title.text: section = title.parent _img = section.find("img", attrs={'itemprop': "image"}) if not _img is None: src: str = _img.get("src") if not src.startswith("/ressource"): rc["visual"] = src log("Enregistrement de l'affiche " + src) _real = page.find("div", attrs={"itemprop": "director"}) if not _real is None: rc["real"] = _real.find("a", attrs={"itemprop": "name"}).get("href") idx_div = 0 for div in page.findAll("div", attrs={'class': "details_bloc"}): if idx_div == 0: if not ":" in div.text: rc["nature"] = div.text if "Année de production : " in div.text: rc["year"] = div.text.replace("Année de production : ", "") if "Genre(s) : " in div.text: rc["category"] = translate(div.text.replace("Genre(s) : ", "")) idx_div = idx_div + 1 if "category" in rc and len(rc["category"]) == 0: rc["category"] = "inconnue" if not job_for is None: if rc["real"] == job_for: rc["job"] = "Réalisation" else: section = page.find("section", {"id": "casting"}) if not section is None: jobs = section.findAll("h2") paras = section.findAll("p") #if not "personne" in links[0].href:links.remove(0) for idx in range(len(paras)): links = paras[idx].findAll("a") for l in links: if "/personne" in l.get("href"): if l.get("href") == job_for: rc["job"] = jobs[idx].text.replace(" : ", "") break if not "job" in rc: pass _synopsis = page.find("div", attrs={"itemprop": "description"}) if not _synopsis is None: rc["synopsis"] = _synopsis.getText(strip=True) return rc
def extract_film_from_imdb( url: str, title: str, name="", job="", ): """ :return: """ page = load_page(url) rc = dict({"title": title, "nature": translate("film")}) zone_info = page.find("div", {"class": "title_block"}) if title.startswith("Episode") or "Episode" in zone_info.getText(): section_title = page.find("div", {"class": "titleParent"}) if not section_title is None: title = section_title.find("a").text + " " + title #Recherche de l'épisode rc["nature"] = MOVIE_NATURE[0] zone_info_comp = page.find("div", {"class": "button_panel navigation_panel"}) if not zone_info_comp is None and "Season" in zone_info_comp.getText(): extract_text = "S" + zone_info_comp.getText().split( "Season")[1].replace("Episode ", "E").replace( " | ", "").replace(" ", "") rc["title"] = title + " " + extract_text.split("\n")[0] for cat in MOVIE_CATEGORIES: if cat.lower() in zone_info.getText().lower(): rc["category"] = cat if not "category" in rc: rc["category"] = "Inconnue" log("Pas de categorie pour " + url) affiche = page.find("div", attrs={"class": "poster"}) if not affiche is None and not affiche.find("img") is None: rc["visual"] = affiche.find("img").get("src") try: rc["year"] = re.search('[1-2][0-9][0-9][0-9]', page.title.text).group(0) except: try: rc["year"] = re.search('[1-2][0-9][0-9][0-9]', zone_info.getText()).group(0) except: pass summary_section = page.find("div", attrs={"class": "summary_text"}) if not summary_section is None and not "Add a Plot" in summary_section.text: rc["synopsis"] = summary_section.text.replace("\n", "").strip() log("Recherche du role sur le film") credits = load_page(url + "fullcredits") if not credits is None: credits = credits.find("div", {"id": "main"}) if not credits is None: links = credits.find_all("a") for l in links: if name.upper() in l.text.upper(): parent = l.parent.parent.find("td", {"class": "credit"}) if not parent is None: rc["job"] = str(parent.getText().replace("\n", "")).strip() rc["job"] = rc["job"].split("(")[0] while " " in rc["job"]: rc["job"] = rc["job"].replace(" ", " ") break if not "job" in rc: rc["job"] = job return rc
def create(self, data): """ Création d'un profil utilisateur avec initialisation du mot de passe :param data: :return: """ log("Création du password, du user et du token") if data["username"].startswith("___"): password = data["username"].replace("___", "") data["username"] = data["email"] sendmail( "Voici votre code de connexion via mail", [data["email"]], "welcome_google", dict({ "email": data["email"], "url_appli": settings.DOMAIN_APPLI + "/?email=" + data["email"], "firstname": data["first_name"], "code": password, "appname": APPNAME })) else: password = reset_password(data["email"], data["username"]) if not "first_name" in data: data["first_name"] = data["email"].split(".")[0] if not "last_name" in data: data["last_name"] = "" user = User.objects.create_user( username=data["username"], password=password, email=data["email"], first_name=data["first_name"], last_name=data["last_name"], ) token = Token.objects.create(user=user) log("Récupération des profils") lp = list(Profil.objects.filter(email=data["email"])) profils = yaml.safe_load( open(settings.STATIC_ROOT + "/profils.yaml", "r").read()) perm = profils["profils"][1]["perm"] log("Création de l'extraUser") if len(lp) > 0: eu = ExtraUser.objects.create(user=user, perm=perm, profil=lp[0], black_list="", level=profils["profils"][1]["level"]) else: eu = ExtraUser.objects.create(user=user, perm=perm, black_list="", level=profils["profils"][1]["level"]) eu.save() user.save() log("Procédure de création terminée") return user
def __init__(self, proxy=BC_PROXY, pem_file=ADMIN_PEMFILE): self._proxy = ElrondProxy(proxy) self.chain_id = self._proxy.get_chain_id() self.environment = TestnetEnvironment(proxy) log("Initialisation de l'admin avec " + pem_file) self._sender = Account(pem_file=pem_file)
def extract_film_from_unifrance(url: str, job_for=None, all_casting=False, refresh_delay=30): rc = dict({"casting": [], "source": "auto:unifrance", "url": url}) if not url.startswith("http"): log("On passe par la page de recherche pour retrouver le titre") page = load_page("https://unifrance.org/recherche?q=" + parse.quote(url), refresh_delay=refresh_delay) _link = page.find("a", attrs={ 'href': wikipedia.re.compile( "^https://www.unifrance.org/film/[0-9][0-9]") }) if _link is None: return None url = _link.get("href") rc["url"] = url #r=wikipedia.requests.get(url, headers={'User-Agent': 'Mozilla/5.0',"accept-encoding": "gzip, deflate"}) #page = wikipedia.BeautifulSoup(str(r.content,encoding="utf-8"),"html5lib") page = load_page(url, refresh_delay) _title = page.find('h1', attrs={'itemprop': "name"}) if not _title is None: rc["title"] = _title.text log("Analyse du film " + rc["title"]) for title in page.findAll('h1'): if title.text.startswith("Affiches"): section = title.parent _img = section.find("img", attrs={'itemprop': "image"}) if not _img is None: src: str = _img.get("src") if not src.startswith("/ressource"): rc["visual"] = src log("Enregistrement de l'affiche " + src) _real = page.find("div", attrs={"itemprop": "director"}) if not _real is None and not _real.find("a", attrs={"itemprop": "name" }) is None: rc["real"] = _real.find("a", attrs={"itemprop": "name"}).get("href") idx_div = 0 for div in page.findAll("div", attrs={'class': "details_bloc"}): if idx_div == 0: if not ":" in div.text: rc["nature"] = div.text if "Numéro de visa" in div.text: rc["visa"] = div.text.split(" : ")[1].replace(".", "") if "Langues de tournage" in div.text: rc["langue"] = div.text.split(" : ")[1] if "Année de production : " in div.text: rc["year"] = div.text.replace("Année de production : ", "") if "Genre(s) : " in div.text: rc["category"] = translate(div.text.replace("Genre(s) : ", "")) idx_div = idx_div + 1 if "category" in rc and len(rc["category"]) == 0: rc["category"] = "inconnue" rc["prix"] = [] for section_prix in page.find_all("div", attrs={"class": "distinction palmares"}): if len(section_prix.find_all("div")) > 0: content = section_prix.find_all("div")[1].text if content is not None: content = content.replace("PlusMoins", "") _prix = { "description": content.split(")Prix")[1].split(" : ")[0] } for l in section_prix.find_all("div")[1].find_all("a"): if "festivals" in l.attrs["href"]: _prix["title"] = l.text.split("(")[0] _prix["year"] = re.findall(r"[1-2][0-9]{3}", l.text)[0] if "person" in l.attrs["href"] and "profil" not in _prix: _prix["profil"] = index_string(l.text) if not "profil" in _prix: log("Attribution du prix à " + job_for) _prix["profil"] = index_string(job_for) if "year" in _prix and "title" in _prix: rc["prix"].append(_prix) log("Ajout du prix " + str(_prix)) else: log("!Prix non conforme sur " + url) if not job_for is None: real_links = page.find("div", { "id": "description" }).find("p").find_all("a") if len(real_links) > 0 and equal_str(real_links[0].text, job_for): rc["job"] = translate("Réalisation") else: #Recherche en réalisation section = page.find("div", {"itemprop": "director"}) if section and (job_for.lower() in section.text.lower()): rc["job"] = translate("Réalisation") #Recherche dans le générique détaillé section = page.find("section", {"id": "casting"}) if not section is None: jobs = section.findAll("h2") paras = section.findAll("p") #if not "personne" in links[0].href:links.remove(0) for idx in range(len(paras)): links = paras[idx].findAll("a") for l in links: job = jobs[idx].text.replace(":", "").strip() if "/personne" in l.get("href"): if (job_for.startswith("http") and l.get("href") == job_for) or equal_str( job_for, l.text): rc["job"] = job break else: if all_casting: #On ajoute l'ensemble du casting au systeme names = str(l.getText()).split(" ") lastname = names[len(names) - 1] rc["casting"].append({ "lastname": lastname, "url": l.attrs["href"], "source": "unifrance", "firstname": l.getText().replace(lastname, "").strip(), "job": job }) #Recherche dans les acteurs for actor in page.find_all("div", {"itemprop": "actors"}): if "data-title" in actor.attrs: if actor.attrs["data-title"].lower() == job_for.lower(): rc["job"] = "actor" if not "job" in rc: pass _synopsis = page.find("div", attrs={"itemprop": "description"}) if not _synopsis is None: rc["synopsis"] = _synopsis.getText(strip=True) return rc
def add_pows_to_profil(profil, links, job_for, refresh_delay_page, templates=[], bot=None, content=None): """ Ajoute des oeuvres au profil :param profil: :param links: :param all_links: :return: """ n_films = 0 n_works = 0 articles = list() job_for = remove_accents(remove_ponctuation(job_for)) for l in links: source = "auto" film = None pow = None job = l["job"] if "job" in l else "" # for p in PieceOfWork.objects.filter(title__iexact=l["text"]): # #si la source à déjà été analysée on ne fait rien # for link in p.links: # if l["url"] == link["url"]: # pow=p # break if "unifrance" in l["url"]: film = extract_film_from_unifrance( l["url"], job_for=job_for, refresh_delay=refresh_delay_page) if "source" in l and "LeFilmFrancais" in l["source"]: film = extract_film_from_leFilmFrancais( l["url"], job_for=job_for, refresh_delay=refresh_delay_page, bot=bot) if "imdb" in l["url"]: film = extract_film_from_imdb(l["url"], l["text"], name=profil.firstname + " " + profil.lastname, job=l["job"], refresh_delay=refresh_delay_page) if film and (film["category"] == "News" or len(film["nature"]) == 0): log("Ce type d'événement est exlue :" + str(film)) film = None if not film is None: if not "nature" in film: film["nature"] = l["nature"] if "title" in film: log("Traitement de " + film["title"] + " à l'adresse " + l["url"]) pow = dict_to_pow(film, content) job = profil.job if "job" in film: job = film["job"] try: result = PieceOfWork.objects.filter( title_index__iexact=pow.title_index) if len(result) > 0: for p in result: if abs(int(p.year) - int(pow.year)) <= 1: log("Le film existe déjà dans la base, on le met a jour avec les nouvelles données" ) pow, hasChanged = fusion(p, pow) if hasChanged: pow.dtLastSearch = datetime.now() pow.save() else: n_films = n_films + 1 pow.dtLastSearch = datetime.now() pow.save() # TODO: a réétudier car des mises a jour de fiche pourrait nous faire rater des films # il faudrait désindenter le code ci-dessous mais du coup il faudrait retrouver le pow except Exception as inst: log("Impossible d'enregistrer le film: " + str(inst.args)) else: log("Impossible de retrouver le film" + str(film)) if not pow is None: if not film is None and "prix" in film and not film[ "prix"] is None and len(film["prix"]) > 0: for prix in film["prix"]: f = Festival.objects.filter(title__iexact=prix["title"]) if f.exists(): f = f.first() else: f = Festival(title=prix["title"].strip().lower()) f.save() a = Award.objects.filter(pow__id=pow.id, year=int(prix["year"]), festival__id=f.id) if a.exists(): a = a.first() else: desc = prix["description"][:249] if desc.startswith("(") and ")" in desc: desc = desc.split(")")[1] a = Award( description=desc, year=prix["year"], pow=pow, festival=f, profil=None if not "profil" in prix else Profil.objects.filter( name_index__iexact=prix["profil"]).first()) try: a.save() except: log("!!Probleme d'enregistrement de l'award sur " + pow.title) if job is None: job = "" t_job = translate(job) if len(t_job) == 0: if job_for and pow and pow.title: log("!Job non identifié pour " + job_for + " sur " + pow.title) #t_job="Non identifié" else: if not Work.objects.filter(pow_id=pow.id, profil_id=profil.id, job=t_job).exists(): if len(t_job) > 0: log("Ajout de l'experience " + job + " traduit en " + t_job + " sur " + pow.title + " à " + profil.lastname) work = Work(pow=pow, profil=profil, job=t_job, source=source) try: work.save() except Exception as inst: log("Impossible d'enregistrer le travail: " + str(inst.args)) if len(templates) > 0: articles.append( create_article(profil, pow, work, templates[0])) else: log("Pas d'enregistrement de la contribution job=" + job) # Enregistrement du casting if not film is None and "casting" in film: for p in film["casting"]: _ps = list( Profil.objects.filter(lastname=p["lastname"], firstname=p["firstname"])) if len(_ps) == 0: log("Ajout de " + p["lastname"] + " comme externe en tant que " + p["job"]) _p = Profil(firstname=p["firstname"], lastname=p["lastname"], name_index=index_string(p["firstname"] + p["lastname"]), department="Ext", cursus="E", school="", email=p["firstname"] + "." + p["lastname"] + "@fictif") _p.add_link(url=p["url"], title=p["source"]) _p.save() else: _p = _ps[0] if not Work.objects.filter(pow_id=pow.id, profil_id=_p.id, job=p["job"]).exists(): work = Work(pow=pow, profil=_p, job=p["job"], source=source) work.save() n_works = n_works + 1 return n_films, n_works, articles
def extract_film_from_imdb(url: str, title: str, name="", job="", all_casting=False, refresh_delay=31): """ :return: """ if not url.startswith("http"): page = load_page("https://www.imdb.com/find?s=tt&q=" + parse.quote(url)) bFind = False for link in page.find_all("a"): if link and equal_str( link.text, url) and link["href"].startswith("/title/tt"): url = "https://www.imdb.com" + link["href"] bFind = True break if not bFind: log(url + " introuvable sur IMDB") return None page = load_page(url, refresh_delay) title = remove_ponctuation(title) rc = dict({ "title": title, "nature": "", "casting": list(), "url": url, "source": "auto:IMDB" }) divs = dict() elts = page.find_all("div", recursive=True) + page.find_all( "h1", recursive=True) + page.find_all( "ul", recursive=True) + page.find_all("p") + page.find_all("li") for div in elts: s = div.text s_t = translate(s) if s_t in MOVIE_NATURE: rc["nature"] = s_t if s.startswith("1h") or s.startswith("2h") and s.endswith( "m") and len(rc["nature"]) == 0: rc["nature"] = translate("long") if "data-testid" in div.attrs: divs[div.attrs["data-testid"]] = div #Recherche de la nature et de la catégorie if not "genres" in divs: elt = page.find("li", { "role": "presentation", "class": "ipc-inline-list__item" }) if not elt is None: cat = elt.text else: cat = "inconnu" else: cat = "" for div in divs["genres"]: cat = cat + translate(div.text.lower()) + " " if cat.split(" ")[0] in MOVIE_NATURE: rc["nature"] = cat.split(" ")[0] cat = cat.replace(rc["nature"], "").strip() rc["category"] = cat.strip() try: title = divs["hero-title-block__title"].text year = divs["hero-title-block__metadata"].text if not year is None: rc["year"] = re.search(r"(\d{4})", year).group(1) except: log("Erreur sur title=" + title) return None affiche = divs["hero-media__poster"] if not affiche is None and not affiche.find("img") is None: rc["visual"] = affiche.find("img").get("src") rc["synopsis"] = "" if "plot" in divs: rc["synopsis"] = divs["plot"].text.replace("Read all", "") #log("Recherche du role sur le film") credits = load_page(url + "fullcredits", refresh_delay) if not credits is None: credits = credits.find("div", {"id": "fullcredits_content"}) if not credits is None: sur_jobs = credits.find_all("h4") tables = credits.find_all("table") for i in range(0, len(tables)): trs = tables[i].find_all("tr") for tr in trs: tds = tr.find_all("td") if len(tds) > 1: findname = tds[0].text.replace("\n", "").replace( " ", " ").strip() if len(findname) == 0: findname = tds[1].text.replace("\n", "").replace( " ", " ").strip() if len(findname) > 0: #log("Nom identifié "+findname) if equal_str(findname, name): sur_job = sur_jobs[i].text.replace( "\n", " ").strip() if "Cast" in sur_job or "Serie Cast" in sur_job: if len(tds) > 3 and "Self" in tds[3].text: job = "" else: job = "Actor" else: job = tds[len(tds) - 1].text.split( "(")[0].split("/")[0].strip() if len(job) == 0 and len( sur_jobs[i].text) > 0: job = sur_job.replace(" by", "").strip() job = job.split("\n")[0] rc["job"] = translate(job) if len(job) == 0: log("Job non identifié pour " + name + " sur " + url) else: if not all_casting: break else: if all_casting: names = tds[0].split(" ") rc["casting"].append({ "name": " ".join(names), "source": "imdb", "job": job }) if not "job" in rc: rc["job"] = job return rc
def importer(request,format=None): header=list() def idx(col:str,row=None,default=None): for c in col.lower().split(","): if c in header: if row is not None: return row[header.index(c)] else: return header.index(c) return default log("Importation de profil") data=base64.b64decode(str(request.body).split("base64,")[1]) for _encoding in ["utf-8","ansi"]: try: txt=str(data, encoding=_encoding) break except: pass txt=txt.replace("’","") d=csv.reader(StringIO(txt), delimiter=";") i=0 record=0 for row in d: if i==0: header=[x.lower() for x in row] else: firstname=row[idx("firstname,prenom")] lastname=row[idx("lastname,nom")] email=row[idx("email,mail")] idx_photo=idx("photo,picture,image") #Eligibilité if len(lastname)>2 and len(lastname)+len(firstname)>5 and len(email)>4 and "@" in email: if idx_photo is None or len(row[idx_photo])==0: if row[idx("genre,civilite")]=="Monsieur" or \ row[idx("genre,civilite")]=="M." or \ row[idx("genre,civilite")].startswith("Mr"): photo="/assets/img/boy.png" else: photo = "/assets/img/girl.png" else: photo=stringToUrl(row[idx("photo")]) #Calcul ts=dateToTimestamp(row[idx("birthday,anniversaire,datenaissance")]) dt = None if not ts is None:dt=datetime.fromtimestamp(ts) profil=Profil( firstname=firstname, lastname=lastname, mobile=row[idx("mobile,telephone,tel")][:20], nationality=idx("nationality,country,pays",row,"France"), birthdate=dt, department=idx("departement,department,formation",row,"")[:60], job=idx("job,metier,competences",row,"")[:60], degree_year=row[idx("promo,promotion,anneesortie")], address=row[idx("address,adresse")][:200], town=idx("town,ville",row,"")[:50], cp=idx("cp,codepostal,code_postal,postal_code,postalcode",row,"")[:5], website=stringToUrl(idx("website,siteweb,site,url",row)), email=email, photo=photo, linkedin=idx("linkedin",row), cursus=idx("cursus",row,"S"), ) try: rc=profil.save() record=record+1 except Exception as inst: log("Probléme d'enregistrement de "+email+" :"+str(inst)) i=i+1 cr=str(record)+" profils importés" log(cr) return Response(cr,200)
def extract_film_from_leFilmFrancais(url: str, job_for=None, all_casting=False, refresh_delay=30, bot=None): rc = dict({ "nature": "", "title": "", "source": "auto:LeFilmFrancais", "url": url }) if not url.startswith("http"): page = load_page( "http://www.lefilmfrancais.com/index.php?option=com_papyrus&view=recherche&searchword=" + parse.quote(url)) bFind = False fiche_film = page.find("div", {"id": "fiche_film"}) if fiche_film: for l in fiche_film.find_all("a"): if l and l["href"].startswith( "http://www.lefilmfrancais.com/film/"): url = l["href"] bFind = True break if not bFind: return None page = load_page(url, bot=bot) if page.find("div", {"id": "synopsis"}): rc["synopsis"] = remove_html(page.find("div", {"id": "synopsis"}).text) elts = page.find_all("h1") if len(elts) > 0: rc["title"] = elts[0].text.split("(")[0] elt = page.find("div", {"id": "detail"}) if elt: for item in elt: if item.name is None: if "sortie" in item.lower(): pass for span in page.find_all("span"): if "class" in span.attrs and len( span.attrs["class"] ) > 0 and span.attrs["class"][0] == "realisation": if not "Réalisation" in span.text.split(",")[0]: rc["nature"] = span.text.split(",")[0].split("(")[0] else: if ":" in span.text: val = span.text.split(":")[1].strip() if "Visa" in span.text: rc["visa"] = val if "Titre original" in span.text: rc["original_title"] = val if "Réalisation" in span.text: rc["real"] = val if "Sortie" in span.text: rc["sortie"] = val if "copies" in span.text: rc["copies"] = int(val) if "Nationalité" in span.text: rc["Nationality"] = val if "Distribution France" in span.text: rc["distribution"] = val for item in page.find_all("li"): lab = item.text.split(":")[0] if ":" in item.text: val = item.text.split(":")[1].split("|")[0].strip() if "production :" in lab: rc["production"] = val if "Partenaires" in lab: rc["financial"] = val if "Récompense" in lab: rc["prix"] = val if "Presse" in lab: rc["presse"] = val if "title" in rc: log("Extraction de " + rc["title"] + " : " + str(rc)) return rc
def extract_awards_from_imdb(profil_url, profil): # Recherche des awards page = load_page(profil_url + "awards?ref_=nm_awd") awards = page.find_all("h3") if len(awards) > 0: awards.pop(0) tables = page.find_all("table", {"class": "awards"}) for i in range(0, len(tables)): for tr in tables[i].find_all("tr"): if tr: festival_title = translate( awards[i].text.split(",")[0].lower().strip()) tds = tr.find_all("td") if len(tds) <= 2: log("Format non conforme " + tr.text) else: year = tds[0].text.replace("\n", "").replace(" ", "").strip() award = tds[1].text film = tds[2].find("a") if film and award: win = ("Winner" in award) film_title = film.text if "(" in tds[2].text: film_year = tds[2].text.split("(")[1].split(")")[0] pow = PieceOfWork.objects.filter( title__iexact=film_title, year__iexact=film_year) if pow.exists(): pow = pow.first() f = Festival.objects.filter( title__iexact=festival_title) if f.exists(): f = f.first() else: f = Festival(title=festival_title) f.save() a = Award.objects.filter(pow__id=pow.id, year=year, festival__id=f.id, profil__id=profil.id) if a.exists(): a = a.first() else: award = award.replace("\n", "").replace( "Winner", "").replace("Nominee", "").strip() if award.startswith("(") and ")" in award: award = award.split(")")[1] a = Award(description=award, year=year, pow=pow, festival=f, profil=profil, winner=win) try: a.save() except: log("!!Probleme d'enregistrement de l'award sur " + pow.title)
def add_pows_to_profil(profil, links, all_links, job_for): """ Ajoute des oeuvres au profil :param profil: :param links: :param all_links: :return: """ for l in links: source = "auto" pow = None for p in PieceOfWork.objects.filter(title__iexact=l["text"]): for link in p.links: if l["url"] == link["url"]: pow = p break if not pow: if "unifrance" in l["url"]: film = extract_film_from_unifrance(l["url"], job_for=job_for) source = "auto:unifrance" if "imdb" in l["url"]: film = extract_film_from_imdb(l["url"], l["text"], name=profil.firstname + " " + profil.lastname, job=l["job"]) if not "nature" in film: film["nature"] = l["nature"] source = "auto:IMDB" log("Traitement de " + film["title"] + " à l'adresse " + l["url"]) pow = PieceOfWork(title=film["title"]) pow.add_link(url=l["url"], title=source) if "nature" in film: pow.nature = translate(film["nature"]) else: pow.nature = "Film" if "synopsis" in film: pow.description = film["synopsis"] if "visual" in film: pow.visual = film["visual"] if "category" in film: pow.category = translate(film["category"]) if "year" in film: pow.year = film["year"] try: result = PieceOfWork.objects.filter(title__iexact=pow.title) if len(result) > 0: log("Le film existe déjà dans la base, on le récupére") pow = result.first() pow.add_link(l["url"], source) pow.save() # TODO: a réétudier car des mises a jour de fiche pourrait nous faire rater des films # il faudrait désindenter le code ci-dessous mais du coup il faudrait retrouver le pow job = profil.job if "job" in film: job = film["job"] except Exception as inst: log("Impossible d'enregistrer le film: " + str(inst.args)) else: job = l["job"] t_job = translate(job) if not Work.objects.filter( pow_id=pow.id, profil_id=profil.id, job=t_job).exists(): log("Ajout de l'experience " + job + " traduit en " + t_job + " sur " + pow.title + " à " + profil.lastname) work = Work(pow=pow, profil=profil, job=t_job, source=source) work.save()
def movie_importer(request): log("Importation de films") header=str(request.body)[20:35] if "excel" in header: txt = str(base64.b64decode(str(request.body).split("base64,")[1]),encoding="utf-8") d = csv.reader(StringIO(txt), delimiter=";") else: d=extract_text_from_pdf(base64.b64decode(str(request.body).split("base64,")[1])) return i = 0 record = 0 for row in list(d): pow=None if len(row)>10: if i>0: if row[6]=="":row[6]="0" if row[11]=="":row[11]="1800" pow:PieceOfWork=PieceOfWork( title=row[0].replace(u'\xa0', u' '), description=row[1], visual=row[4], nature=row[5], dtStart=row[2], budget=int(row[6]), category=row[7], links=[{"url":row[9],"text":row[8]}], lang="US", year=int(row[11]), owner=row[10] ) if not pow is None: try: pow.category = pow.category.replace("|", " ") rc = pow.save() log("Ajout de " + pow.title) record = record + 1 except Exception as inst: log("Probléme d'enregistrement" + str(inst)) else: pows=PieceOfWork.objects.filter(title__iexact=row[0]) if len(pows)==0: pow: PieceOfWork = PieceOfWork( title=row[0], description=translate(row[4]), nature=translate(row[2]), category=row[3], lang="FR" ) if len(row[1])>0:pow.year=int(str(row[1]).split(",")[0]) pow.add_link("","FEMIS","Film ajouter depuis le référencement FEMIS") pow.save() log("Ajout de "+pow.title) else: pow=pows.first() name=row[6].replace("\n","") if " " in name: profils = Profil.objects.filter(lastname__icontains=name.split(" ")[1],firstname__icontains=name.split(" ")[0]) if len(profils)>0: work=Work(pow_id=pow.id,job=translate(row[5]),profil_id=profils.first().id) work.save() i=i+1 log("Importation terminé de "+str(record)+" films") return Response(str(record) + " films importés", 200)
def exec_batch(profils): all_links = list() for pow in PieceOfWork.objects.all(): for l in pow.links: all_links.append(l["url"]) for profil in profils: links = [] job_for = None log("Traitement de " + profil.firstname + " " + profil.lastname) transact = Profil.objects.filter(id=profil.id) if profil.delay_lastsearch() > DELAY_TO_AUTOSEARCH or len( profils) == 1: log("Hors délai ==> mise a jour") profil.dtLastSearch = datetime.now() #infos = extract_profil_from_bellefaye(firstname=profil.firstname, lastname=profil.lastname) #log("Extraction bellefaye " + str(infos)) infos = extract_profil_from_imdb(firstname=profil.firstname, lastname=profil.lastname) log("Extraction d'imdb " + str(infos)) if "url" in infos: profil.add_link(infos["url"], "IMDB") if "photo" in infos and len(profil.photo) == 0: profil.photo = infos["photo"] if "links" in infos: links = links + infos["links"] infos = extract_actor_from_unifrance(profil.firstname + " " + profil.lastname) log("Extraction d'un profil d'unifrance " + str(infos)) if infos is None: advices = dict( {"ref": "Vous devriez créer votre profil sur UniFrance"}) transact.update(advices=advices) else: if len(infos["photo"]) > 0 and not profil.photo.startswith( "http"): transact.update(photo=infos["photo"]) transact.update( links=profil.add_link(infos["url"], "UniFrance")) if "links" in infos: links = links + infos["links"] job_for = infos["url"] add_pows_to_profil(profil, links, all_links, job_for=job_for) # log("Extraction de wikipedia") # try: # infos = extract_actor_from_wikipedia(firstname=profil.firstname,lastname=profil.lastname) # sleep(random() * 5) # if not infos is None: # if "photo" in infos and profil.photo is None: transact.update(photo=infos["photo"]) # if "summary" in infos and profil.biography is None: transact.update(biography=infos["summary"]) # if "links" in infos and len(infos["links"])>0: # links=profil.add_link(url=infos["links"][0]["url"], title=infos["links"][0]["title"],description="") # transact.update(links=links) # except: # pass transact.update(dtLastSearch=profil.dtLastSearch) clear_directory("./Temp", "html") return True
def exec_batch( profils, refresh_delay_profil=31, refresh_delay_pages=31, limit=2000, limit_contrib=10, templates=list(), content={ "unifrance": True, "imdb": True, "lefilmfrancais": False, "senscritique": False }, remove_works=False): """ Scan des profils :param profils: :param refresh_delay: :return: """ bot = None n_films = 0 n_works = 0 rc_articles = list() # all_links=list() # for pow in PieceOfWork.objects.all(): # for l in pow.links: # all_links.append(l["url"]) for profil in profils: limit = limit - 1 if limit < 0 or len(rc_articles) >= limit_contrib: break links = [] job_for = None log("Traitement de " + profil.firstname + " " + profil.lastname + ". Dernière recherche " + profil.dtLastSearch.isoformat(" ")) transact = Profil.objects.filter(id=profil.id) if profil.delay_lastsearch() / 24 > refresh_delay_profil or len( profils) == 1: log("mise a jour de " + profil.lastname + " dont la dernière recherche est " + str(profil.delay_lastsearch() / 24) + " jours") profil.dtLastSearch = datetime.now() #infos = extract_profil_from_bellefaye(firstname=profil.firstname, lastname=profil.lastname) #log("Extraction bellefaye " + str(infos)) try: imdb_profil_url = None if content["imdb"]: infos = extract_profil_from_imdb( firstname=profil.firstname, lastname=profil.lastname, refresh_delay=refresh_delay_pages) log("Extraction d'imdb " + str(infos)) if "url" in infos: profil.add_link(infos["url"], "IMDB") imdb_profil_url = infos["url"] if "photo" in infos and len(profil.photo) == 0: profil.photo = infos["photo"] if "links" in infos: links = links + infos["links"] except: log("Probleme d'extration du profil pour " + profil.lastname + " sur imdb") try: if content["lefilmfrancais"]: infos = extract_profil_from_lefimlfrancais( firstname=profil.firstname, lastname=profil.lastname) if "url" in infos: profil.add_link(infos["url"], "LeFilmF") if len(infos["links"]) > 0: bot = connect_to_lefilmfrancais( "*****@*****.**", "UALHa") links = links + infos["links"] except: log("Probleme d'extration du profil pour " + profil.lastname + " sur leFilmFrancais") if content["unifrance"]: infos = extract_profil_from_unifrance( remove_accents(profil.firstname + " " + profil.lastname), refresh_delay=refresh_delay_pages) log("Extraction d'un profil d'unifrance " + str(infos)) if infos is None: advices = dict({ "ref": "Vous devriez créer votre profil sur UniFrance" }) transact.update(advices=advices) else: if len(infos["photo"]) > 0 and not profil.photo.startswith( "http"): transact.update(photo=infos["photo"]) transact.update( links=profil.add_link(infos["url"], "UniFrance")) if "links" in infos: links = links + infos["links"] #job_for=infos["url"] job_for = profil.firstname + " " + profil.lastname if remove_works: Work.objects.filter(profil_id=profil.id, source__contains="auto").delete() rc_films, rc_works, articles = add_pows_to_profil( profil, links, job_for=job_for, refresh_delay_page=refresh_delay_pages, templates=templates, bot=bot) rc_articles.append(articles) if imdb_profil_url: extract_awards_from_imdb(imdb_profil_url, profil) n_films = n_films + rc_films n_works = n_works + rc_works # log("Extraction de wikipedia") # try: # infos = extract_actor_from_wikipedia(firstname=profil.firstname,lastname=profil.lastname) # sleep(random() * 5) # if not infos is None: # if "photo" in infos and profil.photo is None: transact.update(photo=infos["photo"]) # if "summary" in infos and profil.biography is None: transact.update(biography=infos["summary"]) # if "links" in infos and len(infos["links"])>0: # links=profil.add_link(url=infos["links"][0]["url"], title=infos["links"][0]["title"],description="") # transact.update(links=links) # except: # pass try: transact.update(dtLastSearch=make_aware(profil.dtLastSearch)) except: pass else: log(profil.lastname + " est déjà à jour") #clear_directory("./Temp","html") return n_films, n_works, rc_articles