Python log示例，OpenAlumni.Tools.log Python示例

示例#1

0

显示文件

    def execute(self,
                data,
                _sender=None,
                value="0",
                receiver=NFT_CONTRACT,
                gasLimit=60000000):
        if _sender is None: _sender = self._sender
        _sender.sync_nonce(self._proxy)

        t = Transaction()
        t.nonce = _sender.nonce
        t.version = get_tx_version()
        t.data = data
        t.receiver = receiver
        t.chainID = self._proxy.get_chain_id()
        t.gasLimit = gasLimit
        t.value = value
        t.sender = self._sender.address.bech32()
        t.gasPrice = DEFAULT_GAS_PRICE
        t.sign(self._sender)

        log("Execution d'une transaction sur " + BC_EXPLORER + "/address/" +
            t.sender)
        rc = t.send_wait_result(self._proxy, 60000)

        for r in rc["smartContractResults"]:
            if "data" in r:
                r["result"] = list()
                for p in r["data"].split("@"):
                    if len(p) > 0:
                        r["result"].append(hex_to_str(int(p, 16)))

        return rc["smartContractResults"]

示例#2

0

显示文件

文件： views.py 项目： f80dev/testdcp

def send_to(request):
    body=request.data
    text=body["text"].replace("&#8217;","")

    social_link=""
    if "social" in body and "value" in body["social"] and len(body["social"]["value"])>0:
        social_link="<br>Vous pouvez répondre directement via <a href='"+body["social"]["value"]+"'>"+body["social"]["label"]+"</a>"

    log("Envoie du mail " + text)

    _from=User.objects.get(id=body["_from"])
    _profil=Profil.objects.get(id=body['_to'])

    #TODO vérifier la black liste

    cc=""
    if "send_copy" in body and body["send_copy"]: cc = _from["email"]
    fullname=_from.first_name+" "+_from.last_name
    sendmail(
        subject="["+APPNAME+"] Message de "+fullname,
        template="contact.html",
        field={"text":text,"social_link":social_link,"fullname":fullname},
        _to=[_profil.email,cc]
    )

    return Response("Message envoyé", status=200)

示例#3

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def extract_movie_from_bdfci(pow: PieceOfWork, refresh_delay=31):
    title = pow.title.replace(" ", "+")
    page = load_page("https://www.bdfci.info/?q=" + title +
                     "&pa=f&d=f&page=search&src=bdfci&startFrom=1&offset=1",
                     refresh_delay=refresh_delay)
    articles = page.find_all("article")
    url_ref = None
    if len(articles) == 0:
        entete = page.find("h1")
        if not entete is None:
            text_entete = entete.text.split("<")[0].lower()
            if text_entete == pow.title.lower():
                url_ref = page
    else:
        url = articles[0].find("a")
        if url is not None and url.attrs["title"].lower() == str(
                pow.title).lower():
            url_ref = "https://www.bdfci.info" + url.attrs["href"]

    if url_ref is not None:
        pow.add_link(url_ref, "BDFI")
        log("Ajout du lien BDFCI:" + url_ref + " pour " + pow.title)
        pow.dtLastSearch = datetime.now()
        pow.save()

    return title

示例#4

0

显示文件

def extract_profil_from_imdb(lastname: str, firstname: str):
    peoples = ia.search_person(firstname + " " + lastname)
    infos = dict()
    for p in peoples:
        name = p.data["name"].upper()
        if firstname.upper() in name and lastname.upper() in name:
            if not "nopicture" in p.data["headshot"]:
                infos["photo"] = p.data["headshot"]
            if not "url" in infos:
                infos["url"] = "https://imdb.com/name/nm" + p.personID + "/"
                log("Ouverture de " + infos["url"])
                page = load_page(infos["url"])
                film_zone = page.find("div", {"id": "filmography"})
                if film_zone is None: film_zone = page

                links = film_zone.findAll(
                    'a', attrs={'href': wikipedia.re.compile("^/title/tt")})
                infos["links"] = []
                for l in links:
                    if len(
                            l.getText()
                    ) > 3 and l.parent.parent.parent.parent and l.parent.parent.parent.parent[
                            "id"] == "filmography":
                        texts = l.parent.parent.text.split("(")
                        nature = "long"
                        job: str = l.parent.parent.get("id").split("-")[0]
                        if job == "miscellaneous" or len(job) == 0:
                            temp = l.parent.parent.text.split("(")
                            job = temp[len(temp) - 1].split(")")[0]
                            pass

                        url = "https://www.imdb.com" + l.get("href")
                        url = url.split("?")[0]

                        if len(texts) > 1:
                            nature = ""
                            for nat in MOVIE_NATURE:
                                if nat.lower() in texts[1].lower():
                                    nature = nat
                                    break
                            if nature == "":
                                log("Nature inconnue depuis " + texts[1] +
                                    " pour " + url)

                            if len(texts) > 2 and len(job) == 0:
                                job = texts[2].split(")")[0]

                        infos["links"].append({
                            "url": url,
                            "text": l.getText(),
                            "job": job,
                            "nature": nature
                        })

    return infos

示例#5

0

显示文件

 def init_token(self):
     rc = self.execute(
         "issueNonFungible@" + toHex("FEMISToken", False) + "@" +
         toHex("FEMIS", False), self._sender, NFT_CREATE_COST)
     if len(rc) > 0 and len(rc[0]["result"]) > 1:
         token_id = rc[0]["result"][1]
         log("Création de " + token_id)
         rc = self.execute("setSpecialRole@" + toHex(token_id, False) +
                           "@" + self._sender.address.hex() + "@" +
                           toHex("ESDTNFTCreate", False))
     return rc

示例#6

0

显示文件

文件： views.py 项目： f80dev/testdcp

def update_dictionnary(request):
    for w in Work.objects.all():
        job=translate(w.job)
        if job!=w.job:
            log("Traitement de "+str(w.job))
            w.job=job
            w.save()

    for p in PieceOfWork.objects.all():
        category=translate(p.category)
        if category!=p.category:
            p.category=category
            p.save()

    return Response({"message":"ok"})

示例#7

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def extract_film_from_senscritique(title: str, refresh_delay=31):
    url = "https://www.senscritique.com/search?q=" + urlencode(title.lower())
    log("Recherche sur sens-critique : " + url)
    pages = load_page(url, save=False)
    pages = pages.find_all("div", {"data-qa": "hits"})
    if len(pages) > 0:
        links = pages[0].find_all("a")
        url = ""
        for l in links:
            if "href" in l.attrs and l.attrs["href"].startswith(
                    "https://www.senscritique.com/film/"):
                if l.getText().lower() == title.lower():
                    url = l["href"]
                    log("Extraction de " + url)
                    page = load_page(url, refresh_delay)
                    return url
    return None

示例#8

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def analyse_pows(pows: list,
                 search_with="link",
                 bot=None,
                 cat="unifrance,imdb,lefilmfrancais"):
    infos = list()
    for pow in pows:

        pow.dtLastSearch = datetime.now()
        pow.save()

        if search_with == "link":
            for l in pow.links:
                if "auto:IMDB" in l["text"]:
                    info = extract_film_from_imdb(l["url"], pow.title)
                if "auto:unifrance" in l["text"]:
                    info = extract_film_from_unifrance(l["url"], pow.title)

            infos.append(info)

        if search_with == "title":
            title = pow.title
            year = pow.year
            if title and year:
                for source in cat.split(","):
                    log("Analyse de " + source)
                    if source == "unifrance":
                        film = extract_film_from_unifrance(title)
                    if source == "imdb":
                        film = extract_film_from_imdb(title, title=title)
                    if source == "lefilmfrancais":
                        if bot is None:
                            bot = connect_to_lefilmfrancais(
                                "*****@*****.**", "UALHa")
                        film = extract_film_from_leFilmFrancais(title, bot=bot)

                    if film:
                        pow_2 = dict_to_pow(film)
                        if pow_2.year == year and equal_str(
                                pow_2.title, title):
                            pow, hasChanged = fusion(pow, pow_2)
                            if hasChanged: pow.save()

    bot.quit()
    bot = None

    return infos

示例#9

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def extract_profil_from_imdb(lastname: str, firstname: str, refresh_delay=31):
    peoples = ia.search_person(
        remove_accents(firstname) + " " + remove_accents(lastname))
    infos = dict()
    for p in peoples:
        name = remove_accents(remove_ponctuation(p.data["name"].upper()))
        if firstname.upper() in name and lastname.upper() in name:
            if not "nopicture" in p.data["headshot"]:
                infos["photo"] = p.data["headshot"]
            if not "url" in infos:
                infos["url"] = "https://imdb.com/name/nm" + p.personID + "/"
                log("Ouverture de " + infos["url"])
                page = load_page(infos["url"], refresh_delay=refresh_delay)
                film_zone = page.find("div", {"id": "filmography"})
                if film_zone is None: film_zone = page

                #Contient l'ensemble des liens qui renvoi vers une oeuvre
                infos["links"] = []
                links = film_zone.findAll(
                    'a', attrs={'href': wikipedia.re.compile("^/title/tt")})
                for l in links:
                    if len(
                            l.getText()
                    ) > 3 and l.parent.parent.parent.parent and l.parent.parent.parent.parent[
                            "id"] == "filmography":
                        texts = l.parent.parent.text.split("(")
                        nature = "long"
                        job: str = l.parent.parent.get("id").split("-")[0]
                        if job == "miscellaneous" or len(job) == 0:
                            temp = l.parent.parent.text.split("(")
                            job = temp[len(temp) - 1].split(")")[0]
                            pass
                        else:
                            if not in_dict(job, "jobs"): job = ""

                        url = "https://www.imdb.com" + l.get("href")
                        url = url.split("?")[0]

                        infos["links"].append({
                            "url": url,
                            "text": l.getText(),
                            "job": "",
                            "nature": ""
                        })
    return infos

示例#10

0

显示文件

文件： views.py 项目： f80dev/testdcp

def raz(request):
    filter=request.GET.get("tables","all")
    log("Effacement de "+filter)

    if "profils" in filter or filter=="all":
        log("Effacement des profils")
        Profil.objects.all().delete()

    if "users" in filter  or filter=="all":
        log("Effacement des utilisateurs")
        User.objects.all().delete()

    if "pows" in filter  or filter=="all":
        log("Effacement des oeuvres")
        PieceOfWork.objects.all().delete()

    log("Effacement de la base terminée")
    return Response({"message":"Compte effacé"})

示例#11

0

显示文件

def create_user_profile(sender, instance, created, **kwargs):
    """
    Création d'un utilisateur
    :param sender:
    :param instance:
    :param created:
    :param kwargs:
    :return:
    """
    if created:
        log("Creation de l'extrauser associé")
        perms = yaml.safe_load(
            open(STATIC_ROOT + "/profils.yaml", "r", encoding="utf-8").read())
        perm = ""
        for p in perms["profils"]:
            if p["id"] == DEFAULT_PERMS_PROFIL:
                perm = p["perm"]
                break

        log("Permission par défaut pour les connectés : " + perm)
        ExtraUser.objects.create(user=instance, perm=perm)

示例#12

0

显示文件

文件： DataQuality.py 项目： f80dev/DataCulturePro

    def analyse(self, profils):
        n_profils = 0
        for profil in profils:
            bSave = False
            if len(profil.town) == 0 or profil.town == "0":
                if len(profil.cp) > 0:
                    profil.town = self.find_city(profil.cp)
                    bSave = True
                else:
                    self.add_bad_profil(profil,
                                        "Impossible de retrouver la ville")
            else:
                if profil.town != profil.town.upper():
                    profil.town = profil.town.upper()
                    bSave = True

            if bSave:
                log("Enregistrement de " + str(profil))
                profil.save()
                n_profils = n_profils + 1

        return n_profils, self.log

示例#13

0

显示文件

    def create(self, data):
        """
        Création d'un profil utilisateur avec initialisation du mot de passe
        :param data:
        :return:
        """
        log("Création du password, du user et du token")
        if data["username"].startswith("___"):
            password = data["username"].replace("___", "")
            data["username"] = data["email"]
            sendmail(
                "Voici votre code de connexion via mail", [data["email"]],
                "welcome_google",
                dict({
                    "email": data["email"],
                    "url_appli": DOMAIN_APPLI + "/?email=" + data["email"],
                    "firstname": data["first_name"],
                    "code": password,
                    "appname": APPNAME
                }))
        else:
            password = reset_password(data["email"], data["username"])

        if not "first_name" in data:
            data["first_name"] = data["email"].split(".")[0]
        if not "last_name" in data: data["last_name"] = ""

        user = User.objects.create_user(
            username=data["username"],
            password=password,
            email=data["email"],
            first_name=data["first_name"],
            last_name=data["last_name"],
        )
        token = Token.objects.create(user=user)
        return user

示例#14

0

显示文件

文件： DataQuality.py 项目： f80dev/DataCulturePro

 def fusion(self, p_old, p_new):
     try:
         log("Destruction de " + str(p_old))
         p_old.delete()
         return True
     except:
         log("Destruction de " + str(p_new))
         try:
             p_new.delete()
             return True
         except:
             log("Destruction impossible entre " + str(p_old) + " et " +
                 str(p_new))
             return False

示例#15

0

显示文件

文件： DataQuality.py 项目： f80dev/DataCulturePro

    def find_double(self, with_fusion=True):
        log("Recherche des doublons sur les films")
        rc = 0
        for p1 in self.pows:
            for p2 in self.pows:
                d = jellyfish.jaro_similarity(p1.title.lower(),
                                              p2.title.lower())
                if d > 0.97 and p1.year == p2.year and p1.id != p2.id:
                    log("Suspission de doublon entre " + str(p1) + " et " +
                        str(p2))
                    if with_fusion:
                        if p1.quality_score() > p2.quality_score():
                            b = self.fusion(p2, p1)
                        else:
                            b = self.fusion(p1, p2)
                        if b:
                            log("Fusion réalisée")
                            rc = rc + 1

        return rc

示例#16

0

显示文件

def extract_film_from_unifrance(url: str, job_for=None):
    rc = dict()
    if not url.startswith("http"):
        log("On passe par la page de recherche pour retrouver le titre")
        page = load_page("https://unifrance.org/recherche?q=" +
                         parse.quote(url))
        _link = page.find("a",
                          attrs={
                              'href':
                              wikipedia.re.compile(
                                  "^https://www.unifrance.org/film/[0-9][0-9]")
                          })
        if _link is None: return rc

        url = _link.get("href")

    #r=wikipedia.requests.get(url, headers={'User-Agent': 'Mozilla/5.0',"accept-encoding": "gzip, deflate"})
    #page = wikipedia.BeautifulSoup(str(r.content,encoding="utf-8"),"html5lib")
    page = load_page(url)
    _title = page.find('h1', attrs={'itemprop': "name"})
    if not _title is None:
        rc["title"] = _title.text
        log("Analyse du film " + rc["title"])

    for title in page.findAll('h1'):
        if "Affiches" in title.text:
            section = title.parent
            _img = section.find("img", attrs={'itemprop': "image"})
            if not _img is None:
                src: str = _img.get("src")
                if not src.startswith("/ressource"):
                    rc["visual"] = src
                    log("Enregistrement de l'affiche " + src)

    _real = page.find("div", attrs={"itemprop": "director"})
    if not _real is None:
        rc["real"] = _real.find("a", attrs={"itemprop": "name"}).get("href")

    idx_div = 0
    for div in page.findAll("div", attrs={'class': "details_bloc"}):
        if idx_div == 0:
            if not ":" in div.text: rc["nature"] = div.text

        if "Année de production : " in div.text:
            rc["year"] = div.text.replace("Année de production : ", "")
        if "Genre(s) : " in div.text:
            rc["category"] = translate(div.text.replace("Genre(s) : ", ""))
        idx_div = idx_div + 1

    if "category" in rc and len(rc["category"]) == 0:
        rc["category"] = "inconnue"

    if not job_for is None:
        if rc["real"] == job_for:
            rc["job"] = "Réalisation"
        else:
            section = page.find("section", {"id": "casting"})

            if not section is None:
                jobs = section.findAll("h2")
                paras = section.findAll("p")
                #if not "personne" in links[0].href:links.remove(0)
                for idx in range(len(paras)):
                    links = paras[idx].findAll("a")
                    for l in links:
                        if "/personne" in l.get("href"):
                            if l.get("href") == job_for:
                                rc["job"] = jobs[idx].text.replace(" : ", "")
                                break

    if not "job" in rc:
        pass

    _synopsis = page.find("div", attrs={"itemprop": "description"})
    if not _synopsis is None: rc["synopsis"] = _synopsis.getText(strip=True)

    return rc

示例#17

0

显示文件

def extract_film_from_imdb(
    url: str,
    title: str,
    name="",
    job="",
):
    """

    :return:
    """
    page = load_page(url)

    rc = dict({"title": title, "nature": translate("film")})

    zone_info = page.find("div", {"class": "title_block"})
    if title.startswith("Episode") or "Episode" in zone_info.getText():
        section_title = page.find("div", {"class": "titleParent"})
        if not section_title is None:
            title = section_title.find("a").text + " " + title
        #Recherche de l'épisode
        rc["nature"] = MOVIE_NATURE[0]
        zone_info_comp = page.find("div",
                                   {"class": "button_panel navigation_panel"})
        if not zone_info_comp is None and "Season" in zone_info_comp.getText():
            extract_text = "S" + zone_info_comp.getText().split(
                "Season")[1].replace("Episode ", "E").replace(
                    " | ", "").replace(" ", "")
            rc["title"] = title + " " + extract_text.split("\n")[0]

    for cat in MOVIE_CATEGORIES:
        if cat.lower() in zone_info.getText().lower():
            rc["category"] = cat
    if not "category" in rc:
        rc["category"] = "Inconnue"
        log("Pas de categorie pour " + url)

    affiche = page.find("div", attrs={"class": "poster"})
    if not affiche is None and not affiche.find("img") is None:
        rc["visual"] = affiche.find("img").get("src")

    try:
        rc["year"] = re.search('[1-2][0-9][0-9][0-9]',
                               page.title.text).group(0)
    except:
        try:
            rc["year"] = re.search('[1-2][0-9][0-9][0-9]',
                                   zone_info.getText()).group(0)
        except:
            pass

    summary_section = page.find("div", attrs={"class": "summary_text"})
    if not summary_section is None and not "Add a Plot" in summary_section.text:
        rc["synopsis"] = summary_section.text.replace("\n", "").strip()

    log("Recherche du role sur le film")

    credits = load_page(url + "fullcredits")
    if not credits is None:
        credits = credits.find("div", {"id": "main"})
        if not credits is None:
            links = credits.find_all("a")
            for l in links:
                if name.upper() in l.text.upper():
                    parent = l.parent.parent.find("td", {"class": "credit"})
                    if not parent is None:
                        rc["job"] = str(parent.getText().replace("\n",
                                                                 "")).strip()
                        rc["job"] = rc["job"].split("(")[0]
                        while "  " in rc["job"]:
                            rc["job"] = rc["job"].replace("  ", " ")

                    break

    if not "job" in rc: rc["job"] = job

    return rc

示例#18

0

显示文件

文件： serializers.py 项目： f80dev/testdcp

    def create(self, data):
        """
        Création d'un profil utilisateur avec initialisation du mot de passe
        :param data:
        :return:
        """
        log("Création du password, du user et du token")
        if data["username"].startswith("___"):
            password = data["username"].replace("___", "")
            data["username"] = data["email"]
            sendmail(
                "Voici votre code de connexion via mail", [data["email"]],
                "welcome_google",
                dict({
                    "email":
                    data["email"],
                    "url_appli":
                    settings.DOMAIN_APPLI + "/?email=" + data["email"],
                    "firstname":
                    data["first_name"],
                    "code":
                    password,
                    "appname":
                    APPNAME
                }))
        else:
            password = reset_password(data["email"], data["username"])

        if not "first_name" in data:
            data["first_name"] = data["email"].split(".")[0]
        if not "last_name" in data: data["last_name"] = ""

        user = User.objects.create_user(
            username=data["username"],
            password=password,
            email=data["email"],
            first_name=data["first_name"],
            last_name=data["last_name"],
        )
        token = Token.objects.create(user=user)

        log("Récupération des profils")
        lp = list(Profil.objects.filter(email=data["email"]))
        profils = yaml.safe_load(
            open(settings.STATIC_ROOT + "/profils.yaml", "r").read())
        perm = profils["profils"][1]["perm"]

        log("Création de l'extraUser")
        if len(lp) > 0:
            eu = ExtraUser.objects.create(user=user,
                                          perm=perm,
                                          profil=lp[0],
                                          black_list="",
                                          level=profils["profils"][1]["level"])
        else:
            eu = ExtraUser.objects.create(user=user,
                                          perm=perm,
                                          black_list="",
                                          level=profils["profils"][1]["level"])
        eu.save()

        user.save()

        log("Procédure de création terminée")
        return user

示例#19

0

显示文件

 def __init__(self, proxy=BC_PROXY, pem_file=ADMIN_PEMFILE):
     self._proxy = ElrondProxy(proxy)
     self.chain_id = self._proxy.get_chain_id()
     self.environment = TestnetEnvironment(proxy)
     log("Initialisation de l'admin avec " + pem_file)
     self._sender = Account(pem_file=pem_file)

示例#20

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def extract_film_from_unifrance(url: str,
                                job_for=None,
                                all_casting=False,
                                refresh_delay=30):
    rc = dict({"casting": [], "source": "auto:unifrance", "url": url})
    if not url.startswith("http"):
        log("On passe par la page de recherche pour retrouver le titre")
        page = load_page("https://unifrance.org/recherche?q=" +
                         parse.quote(url),
                         refresh_delay=refresh_delay)
        _link = page.find("a",
                          attrs={
                              'href':
                              wikipedia.re.compile(
                                  "^https://www.unifrance.org/film/[0-9][0-9]")
                          })
        if _link is None: return None

        url = _link.get("href")
        rc["url"] = url

    #r=wikipedia.requests.get(url, headers={'User-Agent': 'Mozilla/5.0',"accept-encoding": "gzip, deflate"})
    #page = wikipedia.BeautifulSoup(str(r.content,encoding="utf-8"),"html5lib")
    page = load_page(url, refresh_delay)
    _title = page.find('h1', attrs={'itemprop': "name"})
    if not _title is None:
        rc["title"] = _title.text
        log("Analyse du film " + rc["title"])

    for title in page.findAll('h1'):
        if title.text.startswith("Affiches"):
            section = title.parent
            _img = section.find("img", attrs={'itemprop': "image"})
            if not _img is None:
                src: str = _img.get("src")
                if not src.startswith("/ressource"):
                    rc["visual"] = src
                    log("Enregistrement de l'affiche " + src)

    _real = page.find("div", attrs={"itemprop": "director"})
    if not _real is None and not _real.find("a", attrs={"itemprop": "name"
                                                        }) is None:
        rc["real"] = _real.find("a", attrs={"itemprop": "name"}).get("href")

    idx_div = 0
    for div in page.findAll("div", attrs={'class': "details_bloc"}):
        if idx_div == 0:
            if not ":" in div.text: rc["nature"] = div.text

        if "Numéro de visa" in div.text:
            rc["visa"] = div.text.split(" : ")[1].replace(".", "")

        if "Langues de tournage" in div.text:
            rc["langue"] = div.text.split(" : ")[1]

        if "Année de production : " in div.text:
            rc["year"] = div.text.replace("Année de production : ", "")
        if "Genre(s) : " in div.text:
            rc["category"] = translate(div.text.replace("Genre(s) : ", ""))
        idx_div = idx_div + 1

    if "category" in rc and len(rc["category"]) == 0:
        rc["category"] = "inconnue"

    rc["prix"] = []
    for section_prix in page.find_all("div",
                                      attrs={"class": "distinction palmares"}):
        if len(section_prix.find_all("div")) > 0:
            content = section_prix.find_all("div")[1].text
            if content is not None:
                content = content.replace("PlusMoins", "")
                _prix = {
                    "description": content.split(")Prix")[1].split(" : ")[0]
                }

                for l in section_prix.find_all("div")[1].find_all("a"):
                    if "festivals" in l.attrs["href"]:
                        _prix["title"] = l.text.split("(")[0]
                        _prix["year"] = re.findall(r"[1-2][0-9]{3}", l.text)[0]
                    if "person" in l.attrs["href"] and "profil" not in _prix:
                        _prix["profil"] = index_string(l.text)

                if not "profil" in _prix:
                    log("Attribution du prix à " + job_for)
                    _prix["profil"] = index_string(job_for)

                if "year" in _prix and "title" in _prix:
                    rc["prix"].append(_prix)
                    log("Ajout du prix " + str(_prix))
                else:
                    log("!Prix non conforme sur " + url)

    if not job_for is None:
        real_links = page.find("div", {
            "id": "description"
        }).find("p").find_all("a")
        if len(real_links) > 0 and equal_str(real_links[0].text, job_for):
            rc["job"] = translate("Réalisation")
        else:
            #Recherche en réalisation
            section = page.find("div", {"itemprop": "director"})
            if section and (job_for.lower() in section.text.lower()):
                rc["job"] = translate("Réalisation")

            #Recherche dans le générique détaillé
            section = page.find("section", {"id": "casting"})
            if not section is None:
                jobs = section.findAll("h2")
                paras = section.findAll("p")
                #if not "personne" in links[0].href:links.remove(0)
                for idx in range(len(paras)):
                    links = paras[idx].findAll("a")
                    for l in links:
                        job = jobs[idx].text.replace(":", "").strip()
                        if "/personne" in l.get("href"):
                            if (job_for.startswith("http")
                                    and l.get("href") == job_for) or equal_str(
                                        job_for, l.text):
                                rc["job"] = job
                                break
                            else:
                                if all_casting:
                                    #On ajoute l'ensemble du casting au systeme
                                    names = str(l.getText()).split(" ")
                                    lastname = names[len(names) - 1]
                                    rc["casting"].append({
                                        "lastname":
                                        lastname,
                                        "url":
                                        l.attrs["href"],
                                        "source":
                                        "unifrance",
                                        "firstname":
                                        l.getText().replace(lastname,
                                                            "").strip(),
                                        "job":
                                        job
                                    })

            #Recherche dans les acteurs
            for actor in page.find_all("div", {"itemprop": "actors"}):
                if "data-title" in actor.attrs:
                    if actor.attrs["data-title"].lower() == job_for.lower():
                        rc["job"] = "actor"

    if not "job" in rc:
        pass

    _synopsis = page.find("div", attrs={"itemprop": "description"})
    if not _synopsis is None:
        rc["synopsis"] = _synopsis.getText(strip=True)

    return rc

示例#21

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def add_pows_to_profil(profil,
                       links,
                       job_for,
                       refresh_delay_page,
                       templates=[],
                       bot=None,
                       content=None):
    """
    Ajoute des oeuvres au profil
    :param profil:
    :param links:
    :param all_links:
    :return:
    """
    n_films = 0
    n_works = 0
    articles = list()
    job_for = remove_accents(remove_ponctuation(job_for))

    for l in links:
        source = "auto"
        film = None
        pow = None
        job = l["job"] if "job" in l else ""
        # for p in PieceOfWork.objects.filter(title__iexact=l["text"]):
        #     #si la source à déjà été analysée on ne fait rien
        #     for link in p.links:
        #         if l["url"] == link["url"]:
        #             pow=p
        #             break

        if "unifrance" in l["url"]:
            film = extract_film_from_unifrance(
                l["url"], job_for=job_for, refresh_delay=refresh_delay_page)

        if "source" in l and "LeFilmFrancais" in l["source"]:
            film = extract_film_from_leFilmFrancais(
                l["url"],
                job_for=job_for,
                refresh_delay=refresh_delay_page,
                bot=bot)

        if "imdb" in l["url"]:
            film = extract_film_from_imdb(l["url"],
                                          l["text"],
                                          name=profil.firstname + " " +
                                          profil.lastname,
                                          job=l["job"],
                                          refresh_delay=refresh_delay_page)
            if film and (film["category"] == "News"
                         or len(film["nature"]) == 0):
                log("Ce type d'événement est exlue :" + str(film))
                film = None

        if not film is None:
            if not "nature" in film: film["nature"] = l["nature"]
            if "title" in film:
                log("Traitement de " + film["title"] + " à l'adresse " +
                    l["url"])

            pow = dict_to_pow(film, content)

            job = profil.job
            if "job" in film: job = film["job"]

            try:
                result = PieceOfWork.objects.filter(
                    title_index__iexact=pow.title_index)
                if len(result) > 0:
                    for p in result:
                        if abs(int(p.year) - int(pow.year)) <= 1:
                            log("Le film existe déjà dans la base, on le met a jour avec les nouvelles données"
                                )
                            pow, hasChanged = fusion(p, pow)
                            if hasChanged:
                                pow.dtLastSearch = datetime.now()
                                pow.save()
                else:
                    n_films = n_films + 1
                    pow.dtLastSearch = datetime.now()
                    pow.save()

                # TODO: a réétudier car des mises a jour de fiche pourrait nous faire rater des films
                # il faudrait désindenter le code ci-dessous mais du coup il faudrait retrouver le pow

            except Exception as inst:
                log("Impossible d'enregistrer le film: " + str(inst.args))
        else:
            log("Impossible de retrouver le film" + str(film))

        if not pow is None:
            if not film is None and "prix" in film and not film[
                    "prix"] is None and len(film["prix"]) > 0:
                for prix in film["prix"]:
                    f = Festival.objects.filter(title__iexact=prix["title"])
                    if f.exists():
                        f = f.first()
                    else:
                        f = Festival(title=prix["title"].strip().lower())
                        f.save()

                    a = Award.objects.filter(pow__id=pow.id,
                                             year=int(prix["year"]),
                                             festival__id=f.id)
                    if a.exists():
                        a = a.first()
                    else:
                        desc = prix["description"][:249]
                        if desc.startswith("(") and ")" in desc:
                            desc = desc.split(")")[1]

                        a = Award(
                            description=desc,
                            year=prix["year"],
                            pow=pow,
                            festival=f,
                            profil=None
                            if not "profil" in prix else Profil.objects.filter(
                                name_index__iexact=prix["profil"]).first())
                        try:
                            a.save()
                        except:
                            log("!!Probleme d'enregistrement de l'award sur " +
                                pow.title)

            if job is None: job = ""
            t_job = translate(job)
            if len(t_job) == 0:
                if job_for and pow and pow.title:
                    log("!Job non identifié pour " + job_for + " sur " +
                        pow.title)
                #t_job="Non identifié"
            else:
                if not Work.objects.filter(pow_id=pow.id,
                                           profil_id=profil.id,
                                           job=t_job).exists():
                    if len(t_job) > 0:
                        log("Ajout de l'experience " + job + " traduit en " +
                            t_job + " sur " + pow.title + " à " +
                            profil.lastname)
                        work = Work(pow=pow,
                                    profil=profil,
                                    job=t_job,
                                    source=source)
                        try:
                            work.save()
                        except Exception as inst:
                            log("Impossible d'enregistrer le travail: " +
                                str(inst.args))

                        if len(templates) > 0:
                            articles.append(
                                create_article(profil, pow, work,
                                               templates[0]))
                    else:
                        log("Pas d'enregistrement de la contribution job=" +
                            job)

            # Enregistrement du casting
            if not film is None and "casting" in film:
                for p in film["casting"]:
                    _ps = list(
                        Profil.objects.filter(lastname=p["lastname"],
                                              firstname=p["firstname"]))
                    if len(_ps) == 0:
                        log("Ajout de " + p["lastname"] +
                            " comme externe en tant que " + p["job"])
                        _p = Profil(firstname=p["firstname"],
                                    lastname=p["lastname"],
                                    name_index=index_string(p["firstname"] +
                                                            p["lastname"]),
                                    department="Ext",
                                    cursus="E",
                                    school="",
                                    email=p["firstname"] + "." +
                                    p["lastname"] + "@fictif")
                        _p.add_link(url=p["url"], title=p["source"])
                        _p.save()
                    else:
                        _p = _ps[0]

                    if not Work.objects.filter(pow_id=pow.id,
                                               profil_id=_p.id,
                                               job=p["job"]).exists():
                        work = Work(pow=pow,
                                    profil=_p,
                                    job=p["job"],
                                    source=source)

                        work.save()
                        n_works = n_works + 1

    return n_films, n_works, articles

示例#22

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def extract_film_from_imdb(url: str,
                           title: str,
                           name="",
                           job="",
                           all_casting=False,
                           refresh_delay=31):
    """

    :return:
    """
    if not url.startswith("http"):
        page = load_page("https://www.imdb.com/find?s=tt&q=" +
                         parse.quote(url))
        bFind = False
        for link in page.find_all("a"):
            if link and equal_str(
                    link.text, url) and link["href"].startswith("/title/tt"):
                url = "https://www.imdb.com" + link["href"]
                bFind = True
                break
        if not bFind:
            log(url + " introuvable sur IMDB")
            return None

    page = load_page(url, refresh_delay)

    title = remove_ponctuation(title)

    rc = dict({
        "title": title,
        "nature": "",
        "casting": list(),
        "url": url,
        "source": "auto:IMDB"
    })

    divs = dict()
    elts = page.find_all("div", recursive=True) + page.find_all(
        "h1", recursive=True) + page.find_all(
            "ul", recursive=True) + page.find_all("p") + page.find_all("li")
    for div in elts:
        s = div.text
        s_t = translate(s)
        if s_t in MOVIE_NATURE:
            rc["nature"] = s_t
        if s.startswith("1h") or s.startswith("2h") and s.endswith(
                "m") and len(rc["nature"]) == 0:
            rc["nature"] = translate("long")
        if "data-testid" in div.attrs:
            divs[div.attrs["data-testid"]] = div

    #Recherche de la nature et de la catégorie
    if not "genres" in divs:
        elt = page.find("li", {
            "role": "presentation",
            "class": "ipc-inline-list__item"
        })
        if not elt is None:
            cat = elt.text
        else:
            cat = "inconnu"
    else:
        cat = ""
        for div in divs["genres"]:
            cat = cat + translate(div.text.lower()) + " "
        if cat.split(" ")[0] in MOVIE_NATURE:
            rc["nature"] = cat.split(" ")[0]
            cat = cat.replace(rc["nature"], "").strip()

    rc["category"] = cat.strip()

    try:
        title = divs["hero-title-block__title"].text
        year = divs["hero-title-block__metadata"].text
        if not year is None: rc["year"] = re.search(r"(\d{4})", year).group(1)
    except:
        log("Erreur sur title=" + title)
        return None

    affiche = divs["hero-media__poster"]
    if not affiche is None and not affiche.find("img") is None:
        rc["visual"] = affiche.find("img").get("src")

    rc["synopsis"] = ""
    if "plot" in divs:
        rc["synopsis"] = divs["plot"].text.replace("Read all", "")

    #log("Recherche du role sur le film")

    credits = load_page(url + "fullcredits", refresh_delay)
    if not credits is None:
        credits = credits.find("div", {"id": "fullcredits_content"})
        if not credits is None:
            sur_jobs = credits.find_all("h4")
            tables = credits.find_all("table")
            for i in range(0, len(tables)):
                trs = tables[i].find_all("tr")

                for tr in trs:
                    tds = tr.find_all("td")
                    if len(tds) > 1:
                        findname = tds[0].text.replace("\n", "").replace(
                            "  ", " ").strip()
                        if len(findname) == 0:
                            findname = tds[1].text.replace("\n", "").replace(
                                "  ", " ").strip()
                        if len(findname) > 0:
                            #log("Nom identifié "+findname)
                            if equal_str(findname, name):
                                sur_job = sur_jobs[i].text.replace(
                                    "\n", " ").strip()
                                if "Cast" in sur_job or "Serie Cast" in sur_job:
                                    if len(tds) > 3 and "Self" in tds[3].text:
                                        job = ""
                                    else:
                                        job = "Actor"
                                else:
                                    job = tds[len(tds) - 1].text.split(
                                        "(")[0].split("/")[0].strip()
                                    if len(job) == 0 and len(
                                            sur_jobs[i].text) > 0:
                                        job = sur_job.replace(" by",
                                                              "").strip()

                                job = job.split("\n")[0]
                                rc["job"] = translate(job)
                                if len(job) == 0:
                                    log("Job non identifié pour " + name +
                                        " sur " + url)
                                else:
                                    if not all_casting: break
                            else:
                                if all_casting:
                                    names = tds[0].split(" ")
                                    rc["casting"].append({
                                        "name":
                                        " ".join(names),
                                        "source":
                                        "imdb",
                                        "job":
                                        job
                                    })

    if not "job" in rc: rc["job"] = job

    return rc

示例#23

0

显示文件

文件： views.py 项目： f80dev/testdcp

def importer(request,format=None):

    header=list()
    def idx(col:str,row=None,default=None):
        for c in col.lower().split(","):
            if c in header:
                if row is not None:
                    return row[header.index(c)]
                else:
                    return header.index(c)
        return default



    log("Importation de profil")
    data=base64.b64decode(str(request.body).split("base64,")[1])

    for _encoding in ["utf-8","ansi"]:
        try:
            txt=str(data, encoding=_encoding)
            break
        except:
            pass

    txt=txt.replace("&#8217;","")

    d=csv.reader(StringIO(txt), delimiter=";")
    i=0
    record=0
    for row in d:
        if i==0:
            header=[x.lower() for x in row]
        else:
            firstname=row[idx("firstname,prenom")]
            lastname=row[idx("lastname,nom")]
            email=row[idx("email,mail")]
            idx_photo=idx("photo,picture,image")
            #Eligibilité
            if len(lastname)>2 and len(lastname)+len(firstname)>5 and len(email)>4 and "@" in email:
                if idx_photo is None or len(row[idx_photo])==0:
                    if row[idx("genre,civilite")]=="Monsieur" or \
                            row[idx("genre,civilite")]=="M." or \
                            row[idx("genre,civilite")].startswith("Mr"):
                        photo="/assets/img/boy.png"
                    else:
                        photo = "/assets/img/girl.png"
                else:
                    photo=stringToUrl(row[idx("photo")])

                #Calcul
                ts=dateToTimestamp(row[idx("birthday,anniversaire,datenaissance")])
                dt = None
                if not ts is None:dt=datetime.fromtimestamp(ts)

                profil=Profil(
                    firstname=firstname,
                    lastname=lastname,
                    mobile=row[idx("mobile,telephone,tel")][:20],
                    nationality=idx("nationality,country,pays",row,"France"),
                    birthdate=dt,
                    department=idx("departement,department,formation",row,"")[:60],
                    job=idx("job,metier,competences",row,"")[:60],
                    degree_year=row[idx("promo,promotion,anneesortie")],
                    address=row[idx("address,adresse")][:200],
                    town=idx("town,ville",row,"")[:50],
                    cp=idx("cp,codepostal,code_postal,postal_code,postalcode",row,"")[:5],
                    website=stringToUrl(idx("website,siteweb,site,url",row)),
                    email=email,
                    photo=photo,
                    linkedin=idx("linkedin",row),
                    cursus=idx("cursus",row,"S"),
                )
                try:
                    rc=profil.save()
                    record=record+1
                except Exception as inst:
                    log("Probléme d'enregistrement de "+email+" :"+str(inst))
        i=i+1

    cr=str(record)+" profils importés"
    log(cr)
    return Response(cr,200)

示例#24

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def extract_film_from_leFilmFrancais(url: str,
                                     job_for=None,
                                     all_casting=False,
                                     refresh_delay=30,
                                     bot=None):
    rc = dict({
        "nature": "",
        "title": "",
        "source": "auto:LeFilmFrancais",
        "url": url
    })
    if not url.startswith("http"):
        page = load_page(
            "http://www.lefilmfrancais.com/index.php?option=com_papyrus&view=recherche&searchword="
            + parse.quote(url))
        bFind = False
        fiche_film = page.find("div", {"id": "fiche_film"})
        if fiche_film:
            for l in fiche_film.find_all("a"):
                if l and l["href"].startswith(
                        "http://www.lefilmfrancais.com/film/"):
                    url = l["href"]
                    bFind = True
                    break
        if not bFind: return None

    page = load_page(url, bot=bot)
    if page.find("div", {"id": "synopsis"}):
        rc["synopsis"] = remove_html(page.find("div", {"id": "synopsis"}).text)

    elts = page.find_all("h1")
    if len(elts) > 0:
        rc["title"] = elts[0].text.split("(")[0]

    elt = page.find("div", {"id": "detail"})
    if elt:
        for item in elt:
            if item.name is None:
                if "sortie" in item.lower():
                    pass

    for span in page.find_all("span"):
        if "class" in span.attrs and len(
                span.attrs["class"]
        ) > 0 and span.attrs["class"][0] == "realisation":
            if not "Réalisation" in span.text.split(",")[0]:
                rc["nature"] = span.text.split(",")[0].split("(")[0]
        else:
            if ":" in span.text:
                val = span.text.split(":")[1].strip()
                if "Visa" in span.text: rc["visa"] = val
                if "Titre original" in span.text: rc["original_title"] = val
                if "Réalisation" in span.text: rc["real"] = val
                if "Sortie" in span.text: rc["sortie"] = val
                if "copies" in span.text: rc["copies"] = int(val)
                if "Nationalité" in span.text: rc["Nationality"] = val
                if "Distribution France" in span.text: rc["distribution"] = val

    for item in page.find_all("li"):
        lab = item.text.split(":")[0]
        if ":" in item.text:
            val = item.text.split(":")[1].split("|")[0].strip()
            if "production :" in lab: rc["production"] = val
            if "Partenaires" in lab: rc["financial"] = val
            if "Récompense" in lab: rc["prix"] = val
            if "Presse" in lab: rc["presse"] = val

    if "title" in rc: log("Extraction de " + rc["title"] + " : " + str(rc))
    return rc

示例#25

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def extract_awards_from_imdb(profil_url, profil):
    # Recherche des awards
    page = load_page(profil_url + "awards?ref_=nm_awd")

    awards = page.find_all("h3")
    if len(awards) > 0:
        awards.pop(0)

    tables = page.find_all("table", {"class": "awards"})

    for i in range(0, len(tables)):
        for tr in tables[i].find_all("tr"):
            if tr:
                festival_title = translate(
                    awards[i].text.split(",")[0].lower().strip())
                tds = tr.find_all("td")
                if len(tds) <= 2:
                    log("Format non conforme " + tr.text)
                else:
                    year = tds[0].text.replace("\n", "").replace(" ",
                                                                 "").strip()
                    award = tds[1].text

                    film = tds[2].find("a")
                    if film and award:
                        win = ("Winner" in award)
                        film_title = film.text
                        if "(" in tds[2].text:
                            film_year = tds[2].text.split("(")[1].split(")")[0]
                            pow = PieceOfWork.objects.filter(
                                title__iexact=film_title,
                                year__iexact=film_year)
                            if pow.exists():
                                pow = pow.first()
                                f = Festival.objects.filter(
                                    title__iexact=festival_title)
                                if f.exists():
                                    f = f.first()
                                else:
                                    f = Festival(title=festival_title)
                                    f.save()

                                a = Award.objects.filter(pow__id=pow.id,
                                                         year=year,
                                                         festival__id=f.id,
                                                         profil__id=profil.id)
                                if a.exists():
                                    a = a.first()
                                else:
                                    award = award.replace("\n", "").replace(
                                        "Winner", "").replace("Nominee",
                                                              "").strip()
                                    if award.startswith("(") and ")" in award:
                                        award = award.split(")")[1]
                                    a = Award(description=award,
                                              year=year,
                                              pow=pow,
                                              festival=f,
                                              profil=profil,
                                              winner=win)
                                try:
                                    a.save()
                                except:
                                    log("!!Probleme d'enregistrement de l'award sur "
                                        + pow.title)

示例#26

0

显示文件

def add_pows_to_profil(profil, links, all_links, job_for):
    """
    Ajoute des oeuvres au profil
    :param profil:
    :param links:
    :param all_links:
    :return:
    """
    for l in links:
        source = "auto"
        pow = None
        for p in PieceOfWork.objects.filter(title__iexact=l["text"]):
            for link in p.links:
                if l["url"] == link["url"]:
                    pow = p
                    break

        if not pow:
            if "unifrance" in l["url"]:
                film = extract_film_from_unifrance(l["url"], job_for=job_for)
                source = "auto:unifrance"

            if "imdb" in l["url"]:
                film = extract_film_from_imdb(l["url"],
                                              l["text"],
                                              name=profil.firstname + " " +
                                              profil.lastname,
                                              job=l["job"])
                if not "nature" in film: film["nature"] = l["nature"]
                source = "auto:IMDB"

            log("Traitement de " + film["title"] + " à l'adresse " + l["url"])

            pow = PieceOfWork(title=film["title"])
            pow.add_link(url=l["url"], title=source)
            if "nature" in film:
                pow.nature = translate(film["nature"])
            else:
                pow.nature = "Film"

            if "synopsis" in film: pow.description = film["synopsis"]
            if "visual" in film: pow.visual = film["visual"]
            if "category" in film: pow.category = translate(film["category"])
            if "year" in film: pow.year = film["year"]

            try:
                result = PieceOfWork.objects.filter(title__iexact=pow.title)
                if len(result) > 0:
                    log("Le film existe déjà dans la base, on le récupére")
                    pow = result.first()
                    pow.add_link(l["url"], source)
                pow.save()

                # TODO: a réétudier car des mises a jour de fiche pourrait nous faire rater des films
                # il faudrait désindenter le code ci-dessous mais du coup il faudrait retrouver le pow
                job = profil.job
                if "job" in film: job = film["job"]

            except Exception as inst:
                log("Impossible d'enregistrer le film: " + str(inst.args))
        else:
            job = l["job"]

        t_job = translate(job)
        if not Work.objects.filter(
                pow_id=pow.id, profil_id=profil.id, job=t_job).exists():
            log("Ajout de l'experience " + job + " traduit en " + t_job +
                " sur " + pow.title + " à " + profil.lastname)
            work = Work(pow=pow, profil=profil, job=t_job, source=source)
            work.save()

示例#27

0

显示文件

文件： views.py 项目： f80dev/testdcp

def movie_importer(request):
    log("Importation de films")
    header=str(request.body)[20:35]
    if "excel" in header:
        txt = str(base64.b64decode(str(request.body).split("base64,")[1]),encoding="utf-8")
        d = csv.reader(StringIO(txt), delimiter=";")
    else:
        d=extract_text_from_pdf(base64.b64decode(str(request.body).split("base64,")[1]))
        return

    i = 0
    record = 0
    for row in list(d):
        pow=None
        if len(row)>10:
            if i>0:
                if row[6]=="":row[6]="0"
                if row[11]=="":row[11]="1800"

                pow:PieceOfWork=PieceOfWork(
                    title=row[0].replace(u'\xa0', u' '),
                    description=row[1],
                    visual=row[4],
                    nature=row[5],
                    dtStart=row[2],
                    budget=int(row[6]),
                    category=row[7],
                    links=[{"url":row[9],"text":row[8]}],
                    lang="US",
                    year=int(row[11]),
                    owner=row[10]
                )

                if not pow is None:
                    try:
                        pow.category = pow.category.replace("|", " ")
                        rc = pow.save()
                        log("Ajout de " + pow.title)
                        record = record + 1
                    except Exception as inst:
                        log("Probléme d'enregistrement" + str(inst))

        else:
            pows=PieceOfWork.objects.filter(title__iexact=row[0])
            if len(pows)==0:
                pow: PieceOfWork = PieceOfWork(
                    title=row[0],
                    description=translate(row[4]),
                    nature=translate(row[2]),
                    category=row[3],
                    lang="FR"
                )
                if len(row[1])>0:pow.year=int(str(row[1]).split(",")[0])
                pow.add_link("","FEMIS","Film ajouter depuis le référencement FEMIS")
                pow.save()
                log("Ajout de "+pow.title)
            else:
                pow=pows.first()

            name=row[6].replace("\n","")
            if " " in name:
                profils = Profil.objects.filter(lastname__icontains=name.split(" ")[1],firstname__icontains=name.split(" ")[0])
                if len(profils)>0:
                    work=Work(pow_id=pow.id,job=translate(row[5]),profil_id=profils.first().id)
                    work.save()



        i=i+1
    log("Importation terminé de "+str(record)+" films")

    return Response(str(record) + " films importés", 200)

示例#28

0

显示文件

def exec_batch(profils):

    all_links = list()
    for pow in PieceOfWork.objects.all():
        for l in pow.links:
            all_links.append(l["url"])

    for profil in profils:
        links = []
        job_for = None

        log("Traitement de " + profil.firstname + " " + profil.lastname)
        transact = Profil.objects.filter(id=profil.id)
        if profil.delay_lastsearch() > DELAY_TO_AUTOSEARCH or len(
                profils) == 1:
            log("Hors délai ==> mise a jour")
            profil.dtLastSearch = datetime.now()

            #infos = extract_profil_from_bellefaye(firstname=profil.firstname, lastname=profil.lastname)
            #log("Extraction bellefaye " + str(infos))

            infos = extract_profil_from_imdb(firstname=profil.firstname,
                                             lastname=profil.lastname)
            log("Extraction d'imdb " + str(infos))
            if "url" in infos: profil.add_link(infos["url"], "IMDB")
            if "photo" in infos and len(profil.photo) == 0:
                profil.photo = infos["photo"]
            if "links" in infos: links = links + infos["links"]

            infos = extract_actor_from_unifrance(profil.firstname + " " +
                                                 profil.lastname)
            log("Extraction d'un profil d'unifrance " + str(infos))
            if infos is None:
                advices = dict(
                    {"ref": "Vous devriez créer votre profil sur UniFrance"})
                transact.update(advices=advices)
            else:
                if len(infos["photo"]) > 0 and not profil.photo.startswith(
                        "http"):
                    transact.update(photo=infos["photo"])
                transact.update(
                    links=profil.add_link(infos["url"], "UniFrance"))
                if "links" in infos:
                    links = links + infos["links"]
                job_for = infos["url"]

            add_pows_to_profil(profil, links, all_links, job_for=job_for)

            # log("Extraction de wikipedia")
            # try:
            #     infos = extract_actor_from_wikipedia(firstname=profil.firstname,lastname=profil.lastname)
            #     sleep(random() * 5)
            #     if not infos is None:
            #         if "photo" in infos and profil.photo is None: transact.update(photo=infos["photo"])
            #         if "summary" in infos and profil.biography is None: transact.update(biography=infos["summary"])
            #         if "links" in infos and len(infos["links"])>0:
            #             links=profil.add_link(url=infos["links"][0]["url"], title=infos["links"][0]["title"],description="")
            #             transact.update(links=links)
            # except:
            #     pass

            transact.update(dtLastSearch=profil.dtLastSearch)

    clear_directory("./Temp", "html")

    return True

示例#29

0

显示文件

文件： Batch.py 项目： f80dev/DataCulturePro

def exec_batch(
        profils,
        refresh_delay_profil=31,
        refresh_delay_pages=31,
        limit=2000,
        limit_contrib=10,
        templates=list(),
        content={
            "unifrance": True,
            "imdb": True,
            "lefilmfrancais": False,
            "senscritique": False
        },
        remove_works=False):
    """
    Scan des profils
    :param profils:
    :param refresh_delay:
    :return:
    """
    bot = None
    n_films = 0
    n_works = 0
    rc_articles = list()

    # all_links=list()
    # for pow in PieceOfWork.objects.all():
    #     for l in pow.links:
    #         all_links.append(l["url"])

    for profil in profils:
        limit = limit - 1
        if limit < 0 or len(rc_articles) >= limit_contrib: break

        links = []
        job_for = None

        log("Traitement de " + profil.firstname + " " + profil.lastname +
            ". Dernière recherche " + profil.dtLastSearch.isoformat(" "))
        transact = Profil.objects.filter(id=profil.id)
        if profil.delay_lastsearch() / 24 > refresh_delay_profil or len(
                profils) == 1:
            log("mise a jour de " + profil.lastname +
                " dont la dernière recherche est " +
                str(profil.delay_lastsearch() / 24) + " jours")
            profil.dtLastSearch = datetime.now()

            #infos = extract_profil_from_bellefaye(firstname=profil.firstname, lastname=profil.lastname)
            #log("Extraction bellefaye " + str(infos))

            try:
                imdb_profil_url = None
                if content["imdb"]:
                    infos = extract_profil_from_imdb(
                        firstname=profil.firstname,
                        lastname=profil.lastname,
                        refresh_delay=refresh_delay_pages)
                    log("Extraction d'imdb " + str(infos))
                    if "url" in infos:
                        profil.add_link(infos["url"], "IMDB")
                        imdb_profil_url = infos["url"]

                    if "photo" in infos and len(profil.photo) == 0:
                        profil.photo = infos["photo"]
                    if "links" in infos: links = links + infos["links"]
            except:
                log("Probleme d'extration du profil pour " + profil.lastname +
                    " sur imdb")

            try:
                if content["lefilmfrancais"]:
                    infos = extract_profil_from_lefimlfrancais(
                        firstname=profil.firstname, lastname=profil.lastname)
                    if "url" in infos: profil.add_link(infos["url"], "LeFilmF")
                    if len(infos["links"]) > 0:
                        bot = connect_to_lefilmfrancais(
                            "*****@*****.**", "UALHa")
                    links = links + infos["links"]
            except:
                log("Probleme d'extration du profil pour " + profil.lastname +
                    " sur leFilmFrancais")

            if content["unifrance"]:
                infos = extract_profil_from_unifrance(
                    remove_accents(profil.firstname + " " + profil.lastname),
                    refresh_delay=refresh_delay_pages)
                log("Extraction d'un profil d'unifrance " + str(infos))
                if infos is None:
                    advices = dict({
                        "ref":
                        "Vous devriez créer votre profil sur UniFrance"
                    })
                    transact.update(advices=advices)
                else:
                    if len(infos["photo"]) > 0 and not profil.photo.startswith(
                            "http"):
                        transact.update(photo=infos["photo"])
                    transact.update(
                        links=profil.add_link(infos["url"], "UniFrance"))
                    if "links" in infos:
                        links = links + infos["links"]
                    #job_for=infos["url"]
                    job_for = profil.firstname + " " + profil.lastname

            if remove_works:
                Work.objects.filter(profil_id=profil.id,
                                    source__contains="auto").delete()

            rc_films, rc_works, articles = add_pows_to_profil(
                profil,
                links,
                job_for=job_for,
                refresh_delay_page=refresh_delay_pages,
                templates=templates,
                bot=bot)
            rc_articles.append(articles)
            if imdb_profil_url:
                extract_awards_from_imdb(imdb_profil_url, profil)
            n_films = n_films + rc_films
            n_works = n_works + rc_works

            # log("Extraction de wikipedia")
            # try:
            #     infos = extract_actor_from_wikipedia(firstname=profil.firstname,lastname=profil.lastname)
            #     sleep(random() * 5)
            #     if not infos is None:
            #         if "photo" in infos and profil.photo is None: transact.update(photo=infos["photo"])
            #         if "summary" in infos and profil.biography is None: transact.update(biography=infos["summary"])
            #         if "links" in infos and len(infos["links"])>0:
            #             links=profil.add_link(url=infos["links"][0]["url"], title=infos["links"][0]["title"],description="")
            #             transact.update(links=links)
            # except:
            #     pass

            try:
                transact.update(dtLastSearch=make_aware(profil.dtLastSearch))
            except:
                pass
        else:
            log(profil.lastname + " est déjà à jour")

    #clear_directory("./Temp","html")

    return n_films, n_works, rc_articles