Exemplo n.º 1
0
def Indexlist(database):
  index_list = RSSparse.resolvetoList(database)
  return index_list
Exemplo n.º 2
0
def main():
  # read the config file.
  config_file = "config.cfg"
  config = ConfigParser()
  config.read(config_file)
  
  mail_enable  = config.get("SYSTEM","mail enable")
  mail_from    = config.get("SYSTEM","mail from")
  mail_to      = config.get("SYSTEM","mail to")
  base_dir     = config.get("SYSTEM","base directory")
  temp_dir     = config.get("SYSTEM","temp directory")
  image_dir    = config.get("SYSTEM","image directory")
  publ_dir     = config.get("SYSTEM","publish directory")
  res_dir      = config.get("SYSTEM","resource directory")
  config_file  = os.path.join(base_dir,"config.cfg")
  
  makeDir(publ_dir)
  makeDir(image_dir)
  
  config_list = config.sections()
  rss_list    = [i for i in config_list if re.search(r"RSS",i)]
  for item in rss_list:
    title        = config.get(item,"title")
    creator      = config.get(item,"creator")
    publisher    = config.get(item,"publisher")
    source       = config.get(item,"source")
    rights       = config.get(item,"rights")
    subject      = config.get(item,"subject")
    description  = config.get(item,"description")
    contributor  = config.get(item,"contributor")
    type2        = config.get(item,"type2")
    format2      = config.get(item,"format2")
    identifier   = config.get(item,"identifier")
    language     = config.get(item,"language")
    relation     = config.get(item,"relation")
    coverage     = config.get(item,"coverage")

    url                = config.get(item,"rss_url")
    findall_key        = config.get(item,"findall key")
    find_key           = config.get(item,"find key")
    pageparse_keyword  = config.get(item,"pageparse keyword")
    handle_weekday     = config.get(item,"handle weekday")

    date      = time.strftime("%Y-%m-%d", time.localtime())
    date_week = time.strftime("%Y%W", time.localtime())
    find_key  = find_key.split(",")
    bookid    = randomString(12)
    
    title2   = title + "-" + date_week

    # generate the url of this week's nfpeople
    if re.search(r'nfpeople',title):
      uri = str(int(time.strftime("%W", time.localtime())) + 8)
      uri = "Magazine-detail-item-" + uri + ".html"
      url += uri
    
    collection = "rss_" + title
    # RSS parse and write into database.
    content = RSSparse.fetchHtml(url)
    if re.search(r'nfpeople',url):
      list_today = RSSparse.fetchListNFpeople(content)
    else:
      list_today = RSSparse.fetchList(content,findall_key,find_key)
    for line in list_today:
      if not RSSparse.isqueryDB(collection,line["link"]):
        html_content = RSSparse.fetchHtml(line["link"])
        if not html_content:
          RSSparse.insertDB(collection,line,errorno = 1)
          continue
        page = PAGEparse.pageFormat(html_content,pageparse_keyword)
        if not page:
          RSSparse.insertDB(collection,line,errorno = 2)
          continue
        if re.search(r'nfpeople',title):
          page = PAGEparse.pageFormatNFpeople(page)
        
        # down image in html enties. 
        os.chdir(image_dir)
        dic = PAGEparse.downloadIMG(page,title)
        
        #os.chdir(oebps_dir)
        page_addbodytag = PAGEparse.addBodytag(dic["entire"])
        page_entire = PAGEparse.htmlHeader() + page_addbodytag
        doc = {}
        doc["html"] = page_entire
        doc["image"] = dic["image"]
        doc.update(line)
        RSSparse.insertDB(collection,doc,errorno = 0)
    
    weekday = time.strftime("%w", time.localtime())
    if weekday != handle_weekday:
      continue
    else:
      pass
    
    # PAGE parse
    list_index = RSSparse.queryDB(collection)
    # if list_index is blank, skip it.
    if not list_index:
      continue


    # down html file and image.
    # create directory for OEBPS-temp file. 
    temp_dir = os.path.join(temp_dir,str(int(time.time())))
    oebps_dir = os.path.join(temp_dir,"OEBPS")
    makeDir(oebps_dir)
    os.chdir(oebps_dir)
    index = 1
    list_index1 = RSSparse.queryDB(collection)
    for i in list_index1:
      out_filename = str(index) + ".html"
      PAGEparse.writeHtml(out_filename,i["html"].encode("utf-8"))
      image_pathto = os.path.join(oebps_dir,"images")
      makeDir(image_pathto)
      for j in i["image"]:
        image_pathfrom = os.path.join(image_dir,j)
        shutil.copy(image_pathfrom,image_pathto)
      index += 1
      # update database's is_operate value.
      RSSparse.updateDB(collection,i['link'])

    # OPF generation
    opf_metadata = OPFgen.opfMetadata(item,config_file)
    opf_entire = OPFgen.opfHeader(bookid) + opf_metadata + OPFgen.opfMainfest(list_index1) + OPFgen.opfSpine(list_index1) + OPFgen.opfGuide() + OPFgen.opfFooter()
    opf_filename = "content.opf"
    writeFile(opf_filename,"w",opf_entire)

    # INDEX html file generation
    html_header = OPFgen.htmlHeader()
    html_body = OPFgen.htmlBody(list_index1)
    html_body = OPFgen.addBodytag(html_body)
    index_entire = html_header + html_body
    html_filename = "0.html"
    writeFile(html_filename,"w",index_entire)

    # TOC.ncx generation
    ncx_header = OPFgen.ncxHeader()
    ncx_head = OPFgen.ncxHead(bookid)
    ncx_doctitle = OPFgen.ncxDocTitle(title2)
    ncx_docauthor = OPFgen.ncxDocAuthor(creator)
    ncx_entirenavpoint = OPFgen.ncxEntireNavPoint(list_index1)
    ncx_navmap = OPFgen.ncxNavMap(ncx_entirenavpoint)
    ncx_body = ncx_head + ncx_doctitle + ncx_docauthor + ncx_navmap
    ncx_body = OPFgen.ncxBody(ncx_body)
    ncx_entire = ncx_header + ncx_body
    ncx_filename = "toc.ncx"
    writeFile(ncx_filename,"w",ncx_entire)
    
    # copy structure files.
    mimetype_path = os.path.join(res_dir,"mimetype")
    stylesheet_path = os.path.join(res_dir,"stylesheet.css")
    metainf_path = os.path.join(res_dir,"META-INF")
    shutil.copy(mimetype_path,temp_dir)
    shutil.copy(stylesheet_path,os.path.join(temp_dir,"OEBPS"))
    shutil.copytree(metainf_path,os.path.join(temp_dir,"META-INF"))
    
    
    # genaration the epub book.
    genEpub(temp_dir,title2,publ_dir)

    # genaration the mobi book use system tool kindlegen
    genMobi(oebps_dir,title2,publ_dir)
    
    # mail mobi book as attachment to specified mail address.
    if mail_enable == "yes":
      os.chdir(publ_dir)
      attachment = title2 + ".mobi"
      mailSend(mail_from,mail_to,attachment)