Exemplo n.º 1
0
def open_directory_data_insert():
    """
    for inserting data after grabbing open directory
    dbase used => pdfs.pdf

    field: {added, keyword, title, url, snippet, thumb_updated, fpath}

    1. downloading image
    2. inserting data from fields

    """
    if request.method == "POST":
        added = datetime.datetime.now()
        title = request.form["title"]
        snippet = request.form["snippet"]
        url = request.form["url"]
        keyword = title

        # insert database dengan 5 data di atas
        pdfdb = c["pdfs"]
        oid = pdfdb.pdf.insert({
                "added": added,
                "title": title,
                "snippet": snippet,
                "url": url,
                "keyword": keyword,
            })
        print "sukses"

        # buat folder path dulu
        # simpan di folder dgn format => thn/bln/tanggal
        # app folder
        app_folder = os.getcwd()
        # get year/month/date
        date_folder = datetime.datetime.now().strftime("%Y/%m/%d")
        # joined both
        full_folder_path = os.path.join(app_folder, "assets", date_folder)  # ~/path_to_app/assets/2014/11/09/fname.jpg
        # create folder if not exists
        if not os.path.exists(full_folder_path):
            os.makedirs(full_folder_path)

        img_filename = "%s-%s.png" % (oid, slugify(title))

        # full file path
        full_file_path = os.path.join(full_folder_path, img_filename)

        # download the thumbnail
        thumb_url = "http://docs.google.com/viewer?url=%s&a=bi&pagenumber=1&w=100" % url
        imgstr = urllib2.urlopen(thumb_url).read()

        with open(full_file_path, "w") as f:
            f.write(imgstr)

        # update database => 1. thumb_url: lokasi ke gambar 2. thumb_updated: terakhir update
        # nanti di halaman index, tampilkan data yang ada thumb_updated saja
        fpath = os.path.join(date_folder, img_filename)
        pdfdb.pdf.update({"_id": ObjectId(oid)}, {"$set": {"fpath": fpath, "thumb_updated": datetime.datetime.now()}})

        return "sukses"
Exemplo n.º 2
0
def admin_single_url_inject():
    """
    input: url path to pdf file
    process: download thumbnail, add title, snippet and url, also keyword (optional)
    post: input into database

    perlukah pdfbox disini????
    """
    if request.method == "POST":
        added = datetime.datetime.now()
        title = request.form["title"]
        snippet = request.form["snippet"]
        url = request.form["url"]
        keyword = title

        # insert database dengan 5 data di atas
        pdfdb = c["pdfs"]
        oid = pdfdb.pdf.insert({
                "added": added,
                "title": title,
                "snippet": snippet,
                "url": url,
                "keyword": keyword,
            })
        # print "sukses"

        # buat folder path dulu
        # simpan di folder dgn format => thn/bln/tanggal
        # app folder
        app_folder = os.getcwd()
        # get year/month/date
        date_folder = datetime.datetime.now().strftime("%Y/%m/%d")
        # joined both
        full_folder_path = os.path.join(app_folder, "assets", date_folder)  # ~/path_to_app/assets/2014/11/09/fname.jpg
        # create folder if not exists
        if not os.path.exists(full_folder_path):
            os.makedirs(full_folder_path)

        img_filename = "%s-%s.png" % (oid, slugify(title))

        # full file path
        full_file_path = os.path.join(full_folder_path, img_filename)

        # download the thumbnail
        thumb_url = "http://docs.google.com/viewer?url=%s&a=bi&pagenumber=1&w=100" % url
        imgstr = urllib2.urlopen(thumb_url).read()

        with open(full_file_path, "w") as f:
            f.write(imgstr)

        # update database => 1. thumb_url: lokasi ke gambar 2. thumb_updated: terakhir update
        # nanti di halaman index, tampilkan data yang ada thumb_updated saja
        fpath = os.path.join(date_folder, img_filename)
        pdfdb.pdf.update({"_id": ObjectId(oid)}, {"$set": {"fpath": fpath, "thumb_updated": datetime.datetime.now()}})

        return "sukses %s %s %s" % (url, title, snippet)
    return render_template("admin/admin_inject_single_url.html")
Exemplo n.º 3
0
def users_public_view(username):
    """ this is for public view
    contains:
    all the picture uploaded.
    """
    dbuser = c["entities"]
    data = dbuser.users.find_one({"username": session.get("username")})
    try:
        favorites = dbuser.users.find_one({"username": username})["favorited"]
        favorites = list(set(favorites))  # unique only
    except:
        favorites = []

    # check if books have thumbnail, resouce? oid
    pdfdb = c["pdfs"]
    thumbs = []
    images = []
    titles = []
    if favorites:
        for i in favorites:
            # get the data
            datafavorites = pdfdb.pdf.find_one({"_id": ObjectId(i)})
            # jika ada fpath (thumbnail tersedia)
            if datafavorites.has_key("fpath"):
                image_path = datafavorites["fpath"]
                title = slugify(datafavorites["title"])

                thumbs.append(True)
                images.append(image_path)
                titles.append(title)
            else:
                title = slugify(datafavorites["title"])

                thumbs.append(False)
                images.append("")
                titles.append(title)

    # zip it
    favorites = zip(favorites, thumbs, images, titles)
    # print favorites

    return render_template("users/users_public_view.html", data=data, favorites=favorites, username=username)
Exemplo n.º 4
0
def thumbnailer(term=None):
    # data
    docdb = c["pdfs"]
    # randomize the skip
    skip_number = random.randint(0, docdb.pdf.find().count() - 10)
    data = docdb.pdf.find({"thumb_updated": {"$exists": False}}).skip(skip_number).limit(10)
    # data = docdb.pdf.find().limit(10)

    if term is not None:
        # cari berdasarkan keyword $and berdasar thumb_updated == false
        # pakai regex aja, sebenere yang lebih pas pake ful text, biar bisa search di-url juga :(
        pattern = re.compile(".*" + term + ".*")
        # data = docdb.pdf.find({"$and": [{"thumb_updated": {"$exists": False}}, {"title": pattern}]}).limit(10)
        data = docdb.command("text", "pdf", search=term, limit=15)
        data = [d["obj"] for d in data["results"] if "thumb_updated" not in d["obj"].viewkeys()]  # harusnya cari yang belum ada field thumb_updated

    if request.method == "POST":
        oid = request.form["oid"]
        url = request.form["url"]
        title = request.form["title"]

        # buat folder path dulu
        # simpan di folder dgn format => thn/bln/tanggal
        # app folder
        app_folder = os.getcwd()
        # get year/month/date
        date_folder = datetime.datetime.now().strftime("%Y/%m/%d")
        # joined both
        full_folder_path = os.path.join(app_folder, "assets", date_folder)  # ~/path_to_app/assets/2014/11/09/fname.jpg
        # create folder if not exists
        if not os.path.exists(full_folder_path):
            os.makedirs(full_folder_path)

        img_filename = "%s-%s.png" % (oid, slugify(title))

        # full file path
        full_file_path = os.path.join(full_folder_path, img_filename)

        # download the thumbnail
        imgstr = urllib2.urlopen(url).read()

        with open(full_file_path, "w") as f:
            f.write(imgstr)

        # update database => 1. thumb_url: lokasi ke gambar 2. thumb_updated: terakhir update
        # nanti di halaman index, tampilkan data yang ada thumb_updated saja
        fpath = os.path.join(date_folder, img_filename)
        docdb.pdf.update({"_id": ObjectId(oid)}, {"$set": {"added": datetime.datetime.now(), "fpath": fpath, "thumb_updated": datetime.datetime.now()}})
        return "sukses %s %s" % (oid, url)

    return render_template("admin/admin_thumbnailer.html", data=data)
Exemplo n.º 5
0
def recent_feed():
    # http://werkzeug.pocoo.org/docs/contrib/atom/ 
    # wajibun: id(link) dan updated
    feed = AtomFeed('Recent Articles',
                   feed_url = request.url, url=request.url_root)
    data = [i[0] for i in col_term.get_range()][:100]
    for d in data:
        feed.add(
            d,
            content_type='text',
            url = 'http://www.hotoid.com/berita/' + slugify(d),
            updated = datetime.datetime.now(),
            )
        
    return feed.get_response()
Exemplo n.º 6
0
def recent_feed():
  # http://werkzeug.pocoo.org/docs/contrib/atom/                                                                                                                                                               
  feed = AtomFeed('Recent Files',
                  feed_url = request.url, url=request.url_root)
  data = db.freewaredata.find().sort("_id", -1).limit(100)
  for d in data:
    try:
      feed.add(
        d['title'],
        d['description'],
        content_type='text', 
        id=d['_id'],
        url='http://www.blankons.com/'+slugify(onlychars(d['title']))+'/'+unicode(d['_id']),
        updated=datetime.datetime.now(),
        )
    except:
      feed.add(
        d['title'],
        content_type='text', 
        id=d['_id'],
        url='http://www.blankons.com/'+slugify(onlychars(d['title']))+'/'+unicode(d['_id']),
        updated=datetime.datetime.now(),
        )
  return feed.get_response()
Exemplo n.º 7
0
def slug(s):
    """
    transform words into slug
    usage: {{ string|slug }}
    """
    return slugify(s)
Exemplo n.º 8
0
def redirect_search():
  q = request.args.get('q')
  return redirect("/" + slugify(q), 301)
Exemplo n.º 9
0
def slug(s):
  return slugify(s)
Exemplo n.º 10
0
def berita(topik):
    topik = slugify(topik)

    col_term.insert(unicode(topik.replace('-', ' ')), {'added': datetime.datetime.now(), 'hits': 1})

    query = topik.replace('-', '+')

    url1 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:kompas.com'
    url2 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:detik.com'
    url3 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:viva.co.id'
    url4 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:kaskus.co.id'
    url5 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:merdeka.com'
    url6 = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=AIzaSyDgVEQ1PB5N4Q0YizPpyLdafL6FmgjdN1w&cx=002126600575969604992:lhhlw8muw00&gl=en&rsz=8&start=0&q=' + query + '+site:republika.co.id'
    #url6 = 'https://gdata.youtube.com/feeds/api/videos?q='+query+'&v=2&alt=jsonc'

    try:
        col_topic.get(topik)['kompas']
        data1 = json.loads(col_topic.get(topik)['kompas'])
    except:
        response1 = urllib2.urlopen(url1).read()
        col_topic.insert(topik, {'kompas': response1}, ttl=86400 * 7)
        data1 = json.loads(col_topic.get(topik)['kompas'])

    try:
        col_topic.get(topik)['detik']
        data2 = json.loads(col_topic.get(topik)['detik'])
    except:
        response2 = urllib2.urlopen(url2).read()
        col_topic.insert(topik, {'detik': response2}, ttl=86400 * 7)
        data2 = json.loads(col_topic.get(topik)['detik'])

    try:
        col_topic.get(topik)['vivanews']
        data3 = json.loads(col_topic.get(topik)['vivanews'])
    except:
        response3 = urllib2.urlopen(url3).read()
        col_topic.insert(topik, {'vivanews': response3}, ttl=86400 * 7)
        data3 = json.loads(col_topic.get(topik)['vivanews'])

    try:
        col_topic.get(topik)['kaskus']
        data4 = json.loads(col_topic.get(topik)['kaskus'])
    except:
        response4 = urllib2.urlopen(url4).read()
        col_topic.insert(topik, {'kaskus': response4}, ttl=86400)
        data4 = json.loads(col_topic.get(topik)['kaskus'])

    try:
        col_topic.get(topik)['merdeka']
        data5 = json.loads(col_topic.get(topik)['merdeka'])
    except:
        response5 = urllib2.urlopen(url5).read()
        col_topic.insert(topik, {'merdeka': response5}, ttl=86400 * 7)
        data5 = json.loads(col_topic.get(topik)['merdeka'])

    try:
        col_topic.get(topik)['republika']
        data6 = json.loads(col_topic.get(topik)['republika'])
    except:
        response6 = urllib2.urlopen(url6).read()
        col_topic.insert(topik, {'republika': response6}, ttl=86400 * 7)
        data6 = json.loads(col_topic.get(topik)['republika'])

    return render_template("berita2.html", data1=data1, data2=data2, data3=data3, data4=data4, data5=data5, data6=data6, topik=topik)
Exemplo n.º 11
0
def cari():
    q = request.args.get('q')
    return redirect("/berita/" + slugify(q), 301)
Exemplo n.º 12
0
def music():
  q = request.args.get('q')
  return redirect("/"+slugify(q)+"/", 301)
Exemplo n.º 13
0
def suggested_topics(tag):
    """
    buat halaman tags yang isinya search term
    dari onkeywords.com, adwords, ubersuggests dan sejenisnya
    """
    # return redirect("/tag/" + tag)
    pdfdb = c["pdfs"]
    tagsdb = c["pdfterms"]
    terms = c["terms"]

    # prevent keyword injection
    kwrd = tagsdb.term.find_one({"term": tag.replace("-", " ")})
    if not kwrd:
        return redirect("/", 302)

    tag = tag.replace("-", " ")
    data = pdfdb.command("text", "pdf", search=tag, limit=10)
    results_count = data["stats"]["nscanned"]

    # ini harusnya prioritas nyari yang ada thumbnailnya dulu
    # piye carane yo????
    data = [d["obj"] for d in data["results"]]

    # building related data

    related_data = terms.command("text", "term", search=tag, limit=10)
    related_data = [d["obj"] for d in related_data["results"]]

    ## on-page seo
    # 1. related tags for meta desc
    meta_desc = ", ".join([d["term"] for d in related_data][:5])
    # 2. meta keywords, ambil dari tag, split, join
    meta_key = ", ".join([t for t in tag.split(" ") if len(t) > 3])
    # 3. meta key tags with get rid off short word
    meta_key_tags = [t for t in tag.split(" ")[:5] if len(t) > 3]
    # 4. fake category?
    meta_key_cat = tag.split(" ")[0]

    # get tags suggestion to enrich index and strengthen onpage seo

    tags = tagsdb.command("text", "term", search=tag)
    tags = [d["obj"] for d in tags["results"]]
    random.shuffle(tags)
    tags = tags[:5]

    # show h1 tags only for googlebot
    # googlebot detection
    refresher = False
    print
    print
    print "referrer: %s" % request.headers
    print "====" * 10
    print request.headers.get("Referer")
    # print request.headers.get("referrer")
    print
    print
    # if 'Googlebot' in request.headers['User-Agent']:
    try:
        if 'google.com' in request.referrer:
            refresher = True
            
            # save setem, on file for now => setem.log
            from urllib import unquote_plus
            referer = request.headers.get("Referer")
            pattern = re.compile(r"&q=(.*?)&")
            setem = re.search(pattern, referer)
            # print referer
            if setem:  # ada setem
                setem = re.search(pattern, referer)
                print setem.group(1)
                # write to file
                with open(os.path.join(os.getcwd(), "setem.log"), "a") as f:
                    if len(setem.group(1)) > 5:  # remove nil setem
                        f.write(unquote_plus(setem.group(1)) + "\n")

            ####
            # disini ini sekalian membuat halaman tag untuk kemudian
            return redirect("/topic/" + slugify(tag))
            # redirect ke tags yang asli, halman tag di kasih javascript code buat
            # prevent back button
            #### 
            
    except:
        pass

    # users count
    users_count = dbentity.users.find().count()
    last_user = [i for i in dbentity.users.find()][-1]

    return render_template("topics.html", data=data, tag=tag, results_count=results_count,
                           related_data=related_data, tags=tags, meta_desc=meta_desc,
                          meta_key=meta_key, meta_key_tags=meta_key_tags, meta_key_cat=meta_key_cat,
                           refresher=refresher, users_count=users_count, last_user=last_user)
Exemplo n.º 14
0
def music():
  q = request.args.get('q')
  db.terms2.insert({'query': q.replace('-', ' ')})
  return redirect("/"+slugify(q)+"/", 301)