def init_data(file_data): ''' Inicializa el diccionario de datos del archivo ''' file_id=mid2hex(file_data["_id"]) file_data["id"]=mid2url(file_data['_id']) file_data['name']=file_data['src'][file_id]['url'] return {"file":file_data,"view":{}}
def api_embed(embed_size, fileid, nameid): if not embed_size in ("s","m","b"): embed_size = "m" file_id = url2mid(fileid) filename = "" download_url = None size = 0 if embed_size == "b": try: data = get_file_metadata(file_id, nameid) download_url = data["view"]["url"] except DatabaseError: abort(503) except FileNotExist: flash("link_not_exist", "error") abort(404) except FileRemoved: flash("error_link_removed", "error") abort(404) except FileUnknownBlock: abort(404) else: data = filesdb.get_file(file_id) size = None if "z" in data: size = data['z'] if "torrent:name" in data["md"]: filename = data["md"]["torrent:name"] elif nameid in data["fn"]: filename = data["fn"][nameid]['n'] elif data["fn"]: filename = data["fn"].values()[0]['n'] else: filename = "" download_url = url_for("files.download", file_id=mid2url(data["_id"]), file_name="%s.html" % filename) return render_template("api/embed.html", filename = filename, size = size, file = data, embed_size = embed_size, download_url = download_url )
def init_data(file_data, ntts=[]): ''' Inicializa el diccionario de datos del archivo ''' file_data["id"]=mid2url(file_data['_id']) file_data['name']=file_data['src'].itervalues().next()['url'] file_se = file_data["se"] if "se" in file_data else None ntt = ntts[int(float(file_se["_id"]))] if file_se and "_id" in file_se and file_se["_id"] in ntts else None if ntt: file_se["info"] = ntt file_se["rel"] = [ntts[relid] for relids in ntt["r"].itervalues() for relid in relids if relid in ntts] if "r" in ntt else [] return {"file":file_data,"view":{}}
def download(file_id, file_name=""): g.page_type = FILE_PAGE_TYPE if request.referrer: try: posibles_queries = referrer_parser.match(request.referrer) if posibles_queries: query = posibles_queries.group(1) or posibles_queries.group(2) or "" if query: get_query_info(u(urllib2.unquote_plus(query).decode("utf-8"))) except: pass error = None file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except TypeError as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=404 else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": empty_redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":301} except BaseException as e: logging.exception(e) error=503 file_id=None if file_id: try: file_data=get_file_metadata(file_id, file_name.replace("-"," ")) except DatabaseError: error=503 except FileNotExist: error=404 except (FileRemoved, FileFoofindRemoved, FileNoSources): error=410 except FileUnknownBlock: error=404 if error is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=503 if error: abort(error) # completa datos de torrent file_data = torrents_data(file_data, True, g.category) if not file_data: abort(404) if file_data["view"]["category"]: g.category = file_data["view"]["category"] if file_data["view"]["category"].tag=="p**n": g.is_adult_content = True else: g.category = file_data["view"]["category_type"] # no permite acceder ficheros que deberian ser bloqueados prepared_phrase = blacklists.prepare_phrase(file_data['view']['nfn']) if prepared_phrase in blacklists["forbidden"] or (prepared_phrase in blacklists["misconduct"] and prepared_phrase in blacklists["underage"]): g.blacklisted_content = "File" if not g.show_blacklisted_content: abort(404) query = download_search(file_data, file_name, "torrent").replace("-"," ") related = single_search(query, category=None, not_category=(None if g.is_adult_content else "p**n"), title=("Related torrents",3,None), zone="File / Related", last_items=[], limit=30, max_limit=15, ignore_ids=[mid2hex(file_id)], show_order=None) # elige el titulo de la página title = file_data['view']['fn'] # recorta el titulo hasta el proximo separador if len(title)>101: for pos in xrange(101, 30, -1): if title[pos] in SEPPER: title = title[:pos].strip() break else: title = title[:101] g.title = [title] page_description = "" if "description" in file_data["view"]["md"]: page_description = file_data["view"]["md"]["description"].replace("\n", " ") if not page_description: if g.category: page_description = _("download_category_desc", category=singular_filter(g.category.title).lower(), categorys=g.category.title.lower()).capitalize() else: page_description = _("download_desc") if len(page_description)<50: if page_description: page_description += ". " page_description += " ".join(text.capitalize()+"." for text in related[1]["files_text"]) if len(page_description)>180: last_stop = page_description[:180].rindex(".") if "." in page_description[:180] else 0 if last_stop<100: last_stop = page_description[:180].rindex(" ") if " " in page_description[:180] else 0 if last_stop<100: last_stop = 180 page_description = page_description[:last_stop]+"." g.page_description = page_description is_canonical_filename = file_data["view"]["seo-fn"]==file_name # registra visita al fichero if g.search_bot: searchd.log_bot_event(g.search_bot, True) else: save_visited([file_data]) if related[0]: g.must_cache = 3600 # last-modified g.last_modified = file_data["file"]["ls"] return render_template('file.html', related_query = query, file_data=file_data, related_files=related, is_canonical_filename=is_canonical_filename, featured=get_featured(related[1]["count"]+len(file_data["view"]["md"]), 1))
def choose_filename(f,text=False): ''' Elige el archivo correcto ''' text=slugify(text) if text else text srcs = f['file']['src'] fns = f['file']['fn'] chosen = None max_count = -1 has_text = 0 try: for hexuri,src in srcs.items(): if 'bl' in src and src['bl']!=0: continue this_has_text=0 for crc,srcfn in src['fn'].items(): #si no tiene nombre no se tiene en cuenta m = srcfn['m'] if len(fns[crc]['n'])>0 else 0 if 'c' in fns[crc]: fns[crc]['c']+=m else: fns[crc]['c']=m if text: slugified_fn = slugify(fns[crc]['n']).strip() if len(slugified_fn)>0: #TODO hace lo mismo que para poner el nombre en negrita y sacar el mejor texto aunque no tenga tildes o no venga unido por espacios if slugified_fn.upper().find(text.upper())!=-1: this_has_text = 2000 else: matches = 0 for word in [re.escape(w) for w in text.split(" ")]: matches += len(re.findall(r"/((?:\b|_)%s(?:\b|_))/i"%word, slugified_fn)) if matches>0: this_has_text = 1000 + matches f['file']['fn'][crc]['tht'] = this_has_text better = fns[crc]['c']>max_count if this_has_text > has_text or (better and this_has_text==has_text): has_text = this_has_text chosen = crc max_count = fns[crc]['c'] except KeyError as e: #para los sources que tienen nombre pero no estan en el archivo logging.exception(e) f['view']['url'] = mid2url(hex2mid(f['file']['_id'])) if chosen: filename = fns[chosen]['n'] ext = fns[chosen]['x'] else: #uses filename from src srcurl = "" for hexuri,src in srcs.items(): if src['url'].find("/")!=-1: srcurl = src['url'] if srcurl=="": return srcurl = srcurl[srcurl.rfind("/")+1:] ext = srcurl[srcurl.rfind(".")+1:] filename = srcurl[0:srcurl.rfind(".")] filename = Markup(filename).striptags()[:512] if not ext in EXTENSIONS: filename += ext ext="" nfilename = filename else: #clean filename end = filename.upper().rfind("."+ext.upper()) if end == -1: nfilename = filename else: nfilename = filename.strip()[0:end] f['view']['fn'] = filename #TODO para los tipo archive buscar el tipo real en el nombre (mp3,avi...) f['view']['fnx'] = ext f['view']['efn'] = filename.replace(" ", "%20") #poner bonito nombre del archivo if nfilename.find(" ")==-1: nfilename = nfilename.replace(".", " ") f['view']['nfn'] = nfilename.replace("_", " ") #nombre del archivo escapado para generar las url de descarga f['view']['qfn'] = u(filename).encode("UTF-8") #nombre del archivo con las palabras que coinciden con la busqueda resaltadas if not text:# or not has_text: f['view']['fnh'] = f['view']['fnhs'] = filename else: f['view']['fnh'], f['view']['fnhs'] = highlight(text,filename,True) return has_text>0
def download(file_id,file_name=None): ''' Muestra el archivo a descargar, votos, comentarios y archivos relacionados ''' def choose_filename_related(file_data): ''' Devuelve el nombre de fichero elegido ''' f=init_data(file_data) choose_filename(f) return f def comment_votes(file_id,comment): ''' Obtiene los votos de comentarios ''' comment_votes={} if "vs" in comment: for i,comment_vote in enumerate(usersdb.get_file_comment_votes(file_id)): if not comment_vote["_id"] in comment_votes: comment_votes[comment_vote["_id"][0:40]]=[0,0,0] if comment_vote["k"]>0: comment_votes[comment_vote["_id"][0:40]][0]+=1 else: comment_votes[comment_vote["_id"][0:40]][1]+=1 #si el usuario esta logueado y ha votado se guarda para mostrarlo activo if current_user.is_authenticated() and comment_vote["u"]==current_user.id: comment_votes[comment_vote["_id"][0:40]][2]=comment_vote["k"] return comment_votes #guardar los parametros desde donde se hizo la busqueda si procede args={} if request.referrer: querystring = urlparse(request.referrer).query if querystring: for params in u(querystring).encode("UTF-8").split("&"): param=params.split("=") if len(param) == 2: args[param[0]]=u(urllib.unquote_plus(param[1])) try: file_id=url2mid(file_id) except Exception as e: # Comprueba que se trate de un ID antiguo try: possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("%s - %s" % (e, file_id)) flash("link_not_exist", "error") abort(404) return redirect( url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), code=301) except filesdb.BogusMongoException as e: logging.exception(e) abort(503) try: data = filesdb.get_file(file_id, bl = None) except filesdb.BogusMongoException as e: logging.exception(e) abort(503) # intenta sacar el id del servidor de sphinx, # resuelve inconsistencias de los datos if not data: sid = get_id_server_from_search(file_id, file_name) if sid: try: data = filesdb.get_file(file_id, sid = sid, bl = None) except filesdb.BogusMongoException as e: logging.exception(e) abort(503) if data: if not data["bl"] in (0, None): if data["bl"] == 1: flash("link_not_exist", "error") elif data["bl"] == 3: flash("error_link_removed", "error") goback = True #block_files( mongo_ids=(data["_id"],) ) abort(404) else: flash("link_not_exist", "error") abort(404) #obtener los datos file_data=fill_data(data, True, file_name) if file_data["view"]["sources"]=={}: #si tiene todos los origenes bloqueados flash("error_link_removed", "error") abort(404) save_visited([file_data]) # Título title = u(file_data['view']['fn']) g.title = u"%s \"%s%s\"%s%s" % ( _(file_data['view']['action']).capitalize(), title[:50], "..." if len(title) > 50 else "", " - " if g.title else "", g.title) #obtener los archivos relacionados related_files = search_related(split_file(file_data["file"])[0][:10]) bin_file_id=mid2bin(file_id) ids=sorted({fid[0:3] for related in related_files for fid in get_ids(related) if fid[0]!=bin_file_id})[:5] files_related=[choose_filename_related(data) for data in get_files(ids)] #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo vote=None if current_user.is_authenticated(): vote=usersdb.get_file_vote(file_id,current_user,g.lang) if vote is None: vote={"k":0} #formulario para enviar comentarios form = CommentForm(request.form) if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate(): usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data) form.t.data="" flash("comment_published_succesfully") #actualizar el fichero con la suma de los comentarios por idioma filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True) #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene comments=[] if "cs" in file_data["file"]: comments=[(i,usersdb.find_userid(comment["_id"][0:24]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)] return render_template('files/download.html',file=file_data,args=args,vote=vote,files_related=files_related,comments=comments,form=form)
def choose_filename(f,text_cache=None): ''' Elige el archivo correcto ''' srcs = f['file']['src'] fns = f['file']['fn'] chosen = None max_count = -1 current_weight = -1 if text_cache and text_cache[0] in fns: # Si text es en realidad un ID de fn chosen = text_cache[0] else: for hexuri,src in srcs.items(): if 'bl' in src and src['bl']!=0: continue for crc,srcfn in src['fn'].items(): if crc not in fns: #para los sources que tienen nombre pero no estan en el archivo continue #si no tiene nombre no se tiene en cuenta m = srcfn['m'] if len(fns[crc]['n'])>0 else 0 if 'c' in fns[crc]: fns[crc]['c']+=m else: fns[crc]['c']=m text_weight = 0 if text_cache: fn_parts = slugify(fns[crc]['n']).strip().split(" ") if len(fn_parts)>0: text_words = slugify(text_cache[0]).split(" ") # valora numero y orden coincidencias last_pos = -1 max_length = length = 0 occurrences = [0]*len(text_words) for part in fn_parts: pos = text_words.index(part) if part in text_words else -1 if pos != -1 and (last_pos==-1 or pos==last_pos+1): length += 1 else: if length > max_length: max_length = length length = 0 if pos != -1: occurrences[pos]=1 last_pos = pos if length > max_length: max_length = length text_weight = sum(occurrences)*100 + max_length f['file']['fn'][crc]['tht'] = text_weight better = fns[crc]['c']>max_count if text_weight > current_weight or (better and text_weight==current_weight): current_weight = text_weight chosen = crc max_count = fns[crc]['c'] f['view']['url'] = mid2url(hex2mid(f['file']['_id'])) f['view']['fnid'] = chosen if chosen: filename = fns[chosen]['n'] ext = fns[chosen]['x'] else: #uses filename from src filename = "" for hexuri,src in srcs.items(): if src['url'].find("/")!=-1: filename = src['url'] if filename=="": return filename = filename[filename.rfind("/")+1:] ext = filename[filename.rfind(".")+1:] filename = filename[0:filename.rfind(".")] #TODO si no viene nombre de archivo buscar en los metadatos para formar uno (por ejemplo serie - titulo capitulo) filename = extension_filename(filename,ext) f['view']['fn'] = filename.replace("?", "") f['view']['qfn'] = qfn = u(filename).encode("UTF-8") #nombre del archivo escapado para generar las url de descarga f['view']['pfn'] = urllib.quote(qfn).replace(" ", "%20") # P2P filename nfilename = seoize_text(filename, " ",True, 0) f['view']['nfn'] = nfilename # añade el nombre del fichero como palabra clave g.keywords.update(set(keyword for keyword in nfilename.split(" ") if len(keyword)>1)) #nombre del archivo con las palabras que coinciden con la busqueda resaltadas if text_cache: f['view']['fnh'], f['view']['fnhs'] = highlight(text_cache[2],filename,True) else: f['view']['fnh'] = filename #esto es solo para download que nunca tiene text return current_weight>0 # indica si ha encontrado el texto buscado
def choose_filename(f,text=False): ''' Elige el archivo correcto ''' srcs = f['file']['src'] fns = f['file']['fn'] hist= "" chosen="" maxCount = 0 hasText = 0 for hexuri,src in srcs.items(): if 'bl' in src and src['bl']!=0: continue srcfns = src['fn'] for crc,srcfn in srcfns.items(): fnn = fns[crc]['n'] if len(fnn.strip())==0: continue thisHasText=0 if 'c' in fns[crc]: fns[crc]['c']+=srcfn['m'] else: fns[crc]['c']=srcfn['m'] if text: if fnn.upper().find(text.upper())!=-1: thisHasText = 2000 else: matches = 0 for word in [re.escape(w) for w in text.split(" ")]: matches += len(re.findall(r"/((?:\b|_)%s(?:\b|_))/i"%word, fnn)) if matches>0: thisHasText = 1000 + matches f['file']['fn'][crc]['tht'] = thisHasText better = fns[crc]['c']>maxCount if thisHasText > hasText or (better and thisHasText==hasText): hasText = thisHasText chosen = crc maxCount = fns[crc]['c'] f['view']['url'] = mid2url(hex2mid(f['file']['_id'])) if chosen!="": filename = fns[chosen]['n'] ext = fns[chosen]['x'] else: #uses filename from src srcurl = "" for crc,srcfn in srcfns.items(): if src['url'].find("/")!=-1: srcurl = src['url'] if srcurl=="": return srcurl = srcurl[srcurl.rfind("/")+1:] ext = srcurl[srcurl.rfind(".")+1:] filename = srcurl[0:srcurl.rfind(".")] if not ext in current_app.config["EXTENSIONS"].keys(): filename += ext ext="" nfilename = filename else: #clean filename end = filename.upper().rfind("."+ext.upper()) if end == -1: nfilename = filename else: nfilename = filename.strip()[0:end] f['view']['fn'] = filename f['view']['efn'] = filename.replace(" ", "%20") f['view']['fnx'] = ext #nombre del archivo con las palabras que coinciden con la busqueda resaltadas if not text: f['view']['fnh']=filename else: separators = u"".join({i for i in filename if not unicodedata.category(i)[0] in ("N","L")}) separated_text = tuple(multipartition(text.lower(), separators)) f['view']['fnh'] = u"".join( (u"<strong>%s</strong>" % filename_part) if filename_part.lower() in separated_text and not filename_part in separators else filename_part for filename_part in multipartition(filename, separators) ) if nfilename.find(" ")==-1: nfilename = nfilename.replace(".", " ") nfilename = nfilename.replace("_", " ") f['view']['nfn'] = nfilename
def file_embed_link(data, size="m"): ''' Obtiene el enlace del iframe de embed para el archivo dado ''' return url_for( "api.api_embed", _external=True, embed_size=size, fileid=mid2url(data["file"]["_id"]), nameid=data["view"]["fnid"])
def download_file(file_id,file_name=None): ''' Devuelve el archivo a descargar, votos, comentarios y archivos relacionados ''' error=(None,"") #guarda el id y el texto de un error file_data=None if file_id is not None: #si viene un id se comprueba que sea correcto if is_valid_url_fileid(file_id): try: #intentar convertir el id que viene de la url a uno interno file_id=url2mid(file_id) except (bson.objectid.InvalidId, TypeError) as e: try: #comprueba si se trate de un ID antiguo possible_file_id = filesdb.get_newid(file_id) if possible_file_id is None: logging.warn("Identificadores numericos antiguos sin resolver: %s."%e, extra={"fileid":file_id}) error=(404,"link_not_exist") else: logging.warn("Identificadores numericos antiguos encontrados: %s."%e, extra={"fileid":file_id}) return {"html": redirect(url_for(".download", file_id=mid2url(possible_file_id), file_name=file_name), 301),"error":(301,"")} except BaseException as e: logging.exception(e) error=(503,"") file_id=None else: abort(404) if file_id: try: file_data=get_file_metadata(file_id, file_name) except DatabaseError: error=(503,"") except FileNotExist: error=(404,"link_not_exist") except (FileRemoved, FileFoofindRemoved, FileNoSources): error=(410,"error_link_removed") except FileUnknownBlock: error=(404,"") if error[0] is None and not file_data: #si no ha habido errores ni hay datos, es porque existe y no se ha podido recuperar error=(503,"") if file_id is None or error[0] is not None: html="" if error[0] is not None: #si hay algun error se devuelve renderizado message_msgid="error_%s_message" % error[0] message_msgstr=_(message_msgid) g.title="%s %s" % (error[0], message_msgstr if message_msgstr!=message_msgid else _("error_500_message")) html=render_template('error.html',error=error,full_screen=True) return {"html": html,"play":None,"file_data":file_data,"error":error} else: save_visited([file_data]) title = u(file_data['view']['fn']) g.title = u"%s \"%s\" - %s" % ( _(file_data['view']['action']).capitalize(), title[:100], g.title) g.page_description = u"%s %s"%(_(file_data['view']['action']).capitalize(), seoize_text(title," ",True)) #si el usuario esta logueado se comprueba si ha votado el archivo para el idioma activo y si ha marcado el archivo como favorito vote=None favorite = False if current_user.is_authenticated(): vote=usersdb.get_file_vote(file_id,current_user,g.lang) favorite=any(file_id==favorite["id"] for favorite in usersdb.get_fav_files(current_user)) #formulario para enviar comentarios form = CommentForm(request.form) if request.method=='POST' and current_user.is_authenticated() and (current_user.type is None or current_user.type==0) and form.validate(): usersdb.set_file_comment(file_id,current_user,g.lang,form.t.data) form.t.data="" flash("comment_published_succesfully") #actualizar el fichero con la suma de los comentarios por idioma filesdb.update_file({"_id":file_id,"cs":usersdb.get_file_comments_sum(file_id),"s":file_data["file"]["s"]},direct_connection=True) #si tiene comentarios se guarda el número del comentario, el usuario que lo ha escrito, el comentario en si y los votos que tiene comments=[] if "cs" in file_data["file"]: comments=[(i,usersdb.find_userid(comment["_id"].split("_")[0]),comment,comment_votes(file_id,comment)) for i,comment in enumerate(usersdb.get_file_comments(file_id,g.lang),1)] # en la pagina de download se intentan obtener palabras para buscar si no las hay if g.args.get("q", None) is None: query = download_search(file_data, file_name, "foofind") if query: g.args["q"] = query.replace(":","") return { "html":render_template('files/download.html',file=file_data,vote={"k":0} if vote is None else vote,favorite=favorite,form=form,comments=comments), "play":file_data["view"]["play"] if "play" in file_data["view"] else "", "file_data":file_data, }
def generate(server, part, afilter, batch_size, output): if not output: output = dirname(abspath(__file__)) + "/gen/" + str(part) + "/" ff = FilesFetcher(server, afilter, batch_size) ff.start() suffix = "."+str(part) count = error_count = 0 logging.info("Comienza generación de sitemap en servidor %s."%server) for afile in ff: try: count += 1 # comprueba si no está bloqueado if int(float(afile.get("bl", 0)))!=0: continue # comprueba si tiene origenes validos for src in afile["src"].itervalues(): if "t" in src and src["t"] in {3, 7, 79, 80, 81, 82, 83, 90} and int(float(src.get("bl",0)))==0: main_src = src break else: continue filename = None # elige algun nombre de fichero interesante for fn in afile.get("fn",{}).itervalues(): filename = fn["n"] if filename=="download" or IS_BTIH.match(filename) or filename.startswith("[TorrentDownloads"): continue extension = fn.get("x",None) if extension and not filename.endswith("."+extension): filename += "." + extension break else: md = afile.get("md",{}) for possible_name in ("torrent:name", "torrent:title", "video:title", "video:name"): if possible_name in md: filename = u(md[possible_name]) break if not filename: filename = u(main_src["url"].rsplit("/",1)[-1]) if filename: first_seen = afile["fs"] get_writer(first_seen, count, output, suffix).write("<url><lastmod>%s</lastmod><loc>%%s%s-%s</loc></url>\n"%(first_seen.strftime("%Y-%m-%dT%H:%M:%SZ"), seoize_text(filename, "-", True), mid2url(afile["_id"]))) except BaseException as e: error_count += 1 if error_count>100: raise e # ante mas de 100 errores, detiene la indexacion con error if count%10000==0: logging.info("Progreso de generación de sitemap del servidor %s."%(server), extra={"count":count, "error_count":error_count}) close_writers() sort_files(output) logging.info("Finaliza la generación de sitemap en servidor %s."%server)