예제 #1
0
파일: models.py 프로젝트: kymo/SHM
 def create_product_to_sell(self, product_params):
     """
     create a product and save into db
     """
     try:
         # change the size of the image
         big_img, small_img = change_img_size(product_params["image"])
         product_to_sell = ProductToSell(
             owner=product_params["owner"],
             product_name=product_params["productname"],
             broad_type=product_params["broadtype"],
             sub_type=product_params["subtype"],
             belong_campus=product_params["belongcampus"],
             trade_type=product_params["tradetype"],
             purity=product_params["purity"],
             big_img=big_img,
             small_img=small_img,
             price=product_params["price"],
             trade_title=product_params["tradetitle"],
             trade_detail=product_params["tradedetail"],
             release_time=product_params["releasetime"],
         )
         product_to_sell.save()
         nid = create_nid(product_to_sell.id)
         product_to_sell.nid = nid
         product_to_sell.save()
         return product_to_sell
     except Exception, e:
         print e
         return None
예제 #2
0
def test_crawler(fiction_url):
    fiction_infor = get_book_infor(
        'http://qidian.com',
        'qidian',
        fiction_url,
    )
    if not fiction_infor:
        return
    print fiction_infor
    #save tag
    #save all the fiction into database
    fiction_title = '武炼巅峰'
    author_name = '莫默'
    ids = ['2494758']
    types = '1'
    try:
        web_site = FictionWebSite.objects.get(title='qidian')
    except:
        web_site = FictionWebSite(
            title='qidian',
            url='http://qidian.com',
        )
        web_site.save()
    try:
        #如果同样的标题的小说已经被收录,唯一性由标题和作者确定
        fic = Fiction.objects.get(fiction_title=fiction_title,
                                  author=author_name)
        #如果所在网站相同,则不继续处理
        DG.trace("get it")
        print fic.source_site.title
        print web_site.title
        if fic.source_site.title == web_site.title:
            return
    except Exception, e:
        print e
        fic = Fiction(
            fiction_title=fiction_title,
            fiction_avatar_url=fiction_infor['avatar'],
            fiction_intro=fiction_infor['intro'],
            fiction_id=ids[0],
            fiction_style=types,
            total_word=fiction_infor['total_word'],
            stock_time=10,
            com_word="",
            source_site=web_site,
            click_time=fiction_infor['click_time'],
            rec_time=fiction_infor['rec_time'],
            author=author_name,
            author_url="",
        )
        fic.save()
        fic.fiction_nid = create_nid(fic.id)
        fic.save()
        if isinstance(fiction_title, unicode):
            fiction_title = fiction_title.encode('utf-8')
        #如果是新加入的小说,则为其建立索引
        t1 = threading.Thread(target=build_index_database,
                              args=(fiction_title, fic, '1'))
        t1.start()
        t1.join()
예제 #3
0
def register(request):
    if request.user.is_authenticated():
        return HttpResponseRedirect('/')
    if request.method == 'GET':
        return RTR('register.html', {'error_msg': False},
                   context_instance=RequestContext(request))
    #get validationg code
    validate_code = request.POST.get('validate', '')
    if not_legal(validate_code) or validate_code.lower(
    ) != request.session['validate'].lower():
        return RTR('register.html', {'error_msg': True},
                   context_instance=RequestContext(request))
    email = request.POST.get('email', '')
    password = request.POST.get('password', '')
    name = request.POST.get('name', '')
    remember = request.POST.get('remember', '')
    try:
        try:
            user = Account.objects.get(name=name)
            return HttpResponse("you ren le")
        except:
            pass
        #build shelf
        shelf = Shelf(fiction_number=0)
        shelf.save()

        user = Account(name=name,
                       password=hashlib.md5(password).hexdigest(),
                       is_active='0',
                       email=email,
                       shelf=shelf)
        user.save()
        user.nid = create_nid(user.id)
        user.save()
        user = authenticate(name=user.name, password=password)
        if user:
            login(request, user)
            response = HttpResponseRedirect('/' + user.nid)
            if remember == 'on':
                response.set_cookie('login', 'True', max_age=7 * 24 * 60 * 60)
            return response
        else:
            return HttpResponse('ok')
    except:
        HttpResponse("服务器故障,请稍候再试<a href = '%s'>返回<a/>" %
                     request.META['HTTP_REFERER'])
예제 #4
0
파일: models.py 프로젝트: kymo/SHM
 def create_user(self, user_params):
     """
     create a user and save into db
     """
     try:
         user = Account(nick_name = user_params['nickname'],
             real_name = user_params['realname'],
             password = hashlib.md5(user_params['password']).hexdigest(),
             email = user_params['email'],
             qq = user_params['qq'],
             person_intro = user_params['intro'],
             phone = user_params['phone'])
         user.save()
         nid = create_nid(user.id)
         user.nid = nid
         user.save()
         return user
     except Exception,e:
         print e
         return None
예제 #5
0
파일: models.py 프로젝트: kymo/SHM
 def create_product_to_buy(self, product_params):
     """
     create a product and save into db
     """
     try:
         product_to_buy = ProductToBuy(
             owner=product_params["owner"],
             product_name=product_params["productname"],
             broad_type=product_params["broadtype"],
             trade_type=product_params["tradetype"],
             sub_type=product_params["subtype"],
             release_time=product_params["releasetime"],
         )
         product_to_buy.save()
         nid = create_nid(product_to_buy.id)
         product_to_buy.nid = nid
         product_to_buy.save()
         return product_to_buy
     except Exception, e:
         print e
         return None
예제 #6
0
파일: crawler.py 프로젝트: kymo/fiction
def crawler_types(types, 
        web_site, 
        url,    
        ):      
    #index stands for the pageIndex
    pattern = ALL_PATTERN[web_site.title]

    for index in range(1, 100):
        new_url = build_url_page(url, web_site.title, index, types)
        html_page = urllib2.urlopen(new_url)
        html_content = html_page.read()
        html_content = gzip_content(html_content)
        #get content
        content = BeautifulSoup(html_content)
        out = content.findAll(pattern['all_content_tag'], pattern['all_content_dict'])
        contents = ''.join([str(item) for item in out])
        chapter = BeautifulSoup(contents)
        fictions = chapter.findAll(pattern['all_fiction_tag'], pattern['all_fiction_dict'])
        for item in fictions:
            #get fiction title
            contents = str(item)
            try:
                fiction_title = re.findall(pattern['all_fiction_title'], contents)
                fiction_title = ''.join([str(_item) for _item in fiction_title])
                #get fiction url
                fiction_url = re.findall(pattern['all_fiction_url'], contents)
                fiction_url = ''.join([str(_item) for _item in fiction_url[0]])
                #get fiction type
                fiction_type = re.findall(pattern['all_fiction_type'], contents)
                fiction_type = ''.join([str(_item) for _item in fiction_type])
                #get fiction id in thissite
                author_name = re.findall(pattern['all_author_name'], contents)
                author_name = ''.join([str(_item) for _item in author_name])
                ids = re.findall(pattern['ids_pattern'], fiction_url)
                if not fiction_title or not fiction_url or not fiction_type or not author_name:
                    continue
                #get write information 
            except:
                continue
            print '小说%s抓取开始' % fiction_title 
            try:
                #if the fiction has been select into the database
                #which is indetified by author and title
                hash_url = HashUrl.objects.get(urls = fiction_url)
                continue
            except:
                hash_url = HashUrl(urls = fiction_url)
                hash_url.save()

            fiction_infor = get_book_infor(web_site.url,
                web_site.title,
                fiction_url,    
            )
            if not fiction_infor:
                continue
            #save tag
            #save all the fiction into database
            fic = Fiction(fiction_title = fiction_title,
                fiction_avatar_url = fiction_infor['avatar'],
                fiction_intro = fiction_infor['intro'],
                fiction_id = ids[0],
                fiction_style = types,
                total_word=  fiction_infor['total_word'],
                stock_time = 10,
                com_word = "",
                source_site = web_site,
                click_time = fiction_infor['click_time'],
                rec_time = fiction_infor['rec_time'],
                author = author_name,
                author_url = "",
            )
            fic.save()
            fic.fiction_nid = create_nid(fic.id)
            fic.save()
            print '小说 %s 基本信息存入数据库' % fiction_title
            #save tags
            for tags in fiction_infor['tags']:            
                try:
                    tag = Tag.objects.get(tag = tags)
                except:
                    tag = Tag(tag = tags)
                    tag.save()
                fic.tag.add(tag)
                fic.save()
            ms = MemberShip(fiction = fic, website = web_site)
            ms.save()
            threads = []
            print '获取小说 %s  章节' % fiction_title
            chapter_url = build_url_fiction(ids[0], web_site.title)
            #chapter_infor = chapter_func[web_site.title](chapter_url)
            fiction_intro = fiction_infor['intro']
            if isinstance(fiction_title, unicode):
                fiction_title = fiction_title.encode('utf-8')
            t1 = threading.Thread(target = build_index_database, args = (fiction_title, fic, '1'))
            t2 = threading.Thread(target = chapter_func[web_site.title], args = (chapter_url, fic, web_site))
            t1.start()
            t2.start()
            t1.join()
            t2.join()
예제 #7
0
def crawler_types(
    types,
    web_site,
    url,
):
    #index stands for the pageIndex
    pattern = ALL_PATTERN[web_site.title]

    for index in range(1, 100):
        new_url = build_url_page(url, web_site.title, index, types)
        html_page = urllib2.urlopen(new_url)
        html_content = html_page.read()
        html_content = gzip_content(html_content)
        #get content
        content = BeautifulSoup(html_content)
        out = content.findAll(pattern['all_content_tag'],
                              pattern['all_content_dict'])
        contents = ''.join([str(item) for item in out])
        chapter = BeautifulSoup(contents)
        fictions = chapter.findAll(pattern['all_fiction_tag'],
                                   pattern['all_fiction_dict'])
        for item in fictions:
            #get fiction title
            contents = str(item)
            try:
                fiction_title = re.findall(pattern['all_fiction_title'],
                                           contents)
                fiction_title = ''.join(
                    [str(_item) for _item in fiction_title])
                #get fiction url
                fiction_url = re.findall(pattern['all_fiction_url'], contents)
                fiction_url = ''.join([str(_item) for _item in fiction_url[0]])
                #get fiction type
                fiction_type = re.findall(pattern['all_fiction_type'],
                                          contents)
                fiction_type = ''.join([str(_item) for _item in fiction_type])
                #get fiction id in thissite
                author_name = re.findall(pattern['all_author_name'], contents)
                author_name = ''.join([str(_item) for _item in author_name])
                ids = re.findall(pattern['ids_pattern'], fiction_url)
                if not fiction_title or not fiction_url or not fiction_type or not author_name:
                    continue
                #get write information
            except:
                continue
            print '小说%s抓取开始' % fiction_title
            try:
                #if the fiction has been select into the database
                #which is indetified by author and title
                hash_url = HashUrl.objects.get(urls=fiction_url)
                continue
            except:
                hash_url = HashUrl(urls=fiction_url)
                hash_url.save()

            fiction_infor = get_book_infor(
                web_site.url,
                web_site.title,
                fiction_url,
            )
            if not fiction_infor:
                continue
            #save tag
            #save all the fiction into database
            fic = Fiction(
                fiction_title=fiction_title,
                fiction_avatar_url=fiction_infor['avatar'],
                fiction_intro=fiction_infor['intro'],
                fiction_id=ids[0],
                fiction_style=types,
                total_word=fiction_infor['total_word'],
                stock_time=10,
                com_word="",
                source_site=web_site,
                click_time=fiction_infor['click_time'],
                rec_time=fiction_infor['rec_time'],
                author=author_name,
                author_url="",
            )
            fic.save()
            fic.fiction_nid = create_nid(fic.id)
            fic.save()
            print '小说 %s 基本信息存入数据库' % fiction_title
            #save tags
            for tags in fiction_infor['tags']:
                try:
                    tag = Tag.objects.get(tag=tags)
                except:
                    tag = Tag(tag=tags)
                    tag.save()
                fic.tag.add(tag)
                fic.save()
            ms = MemberShip(fiction=fic, website=web_site)
            ms.save()
            threads = []
            print '获取小说 %s  章节' % fiction_title
            chapter_url = build_url_fiction(ids[0], web_site.title)
            #chapter_infor = chapter_func[web_site.title](chapter_url)
            fiction_intro = fiction_infor['intro']
            if isinstance(fiction_title, unicode):
                fiction_title = fiction_title.encode('utf-8')
            t1 = threading.Thread(target=build_index_database,
                                  args=(fiction_title, fic, '1'))
            t2 = threading.Thread(target=chapter_func[web_site.title],
                                  args=(chapter_url, fic, web_site))
            t1.start()
            t2.start()
            t1.join()
            t2.join()
예제 #8
0
    def run(self):
        """thread method"""

        #get all the fresh information
        _headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",
            "Accept": "text/plain"
        }
        print self.newest_url
        request = urllib2.Request(self.newest_url, headers=_headers)
        html_page = urllib2.urlopen(request).read()
        try:
            import gzip, StringIO
            data = html_page
            data = StringIO.StringIO(data)
            gzipper = gzip.GzipFile(fileobj=data)
            html = gzipper.read()
            html_page = html
        except:
            pass
        html_page = BeautifulSoup(str(html_page))
        content = html_page.findAll(self.content_tag, self.content_dict)
        contents = ''.join([str(item) for item in content])
        chapter_infor = BeautifulSoup(contents)
        content = chapter_infor.findAll(self.chapter_tag, self.chapter_dict)
        indexs = 1
        print content
        for item in content:
            indexs += 1
            contents = str(item)
            types = ''.join(re.findall(self.types_pattern, contents))
            title = ''.join(re.findall(self.title_pattern, contents))
            chapter = ''.join(re.findall(self.chapter_pattern, contents))
            author = ''.join(re.findall(self.author_pattern, contents))
            fiction_url = ''.join(
                re.findall(self.fiction_url_pattern, contents))
            chapter_url = ''.join(
                re.findall(self.chapter_url_pattern, contents))
            if not types or not title or \
                not chapter or not author or not fiction_url or not chapter_url:
                continue
            newest_chapter_url = chapter_url
            host = self.host
            print author
            if self.host[len(self.host) - 1] == '/':
                host = self.host[:len(self.host) - 1]
            if chapter_url[0] == '/':
                chapter_url = host + chapter_url
            if fiction_url[0] == '/':
                fiction_url = host + fiction_url
            try:
                web_site = FictionWebSite.objects.get(url=self.host)
            except:
                web_site = FictionWebSite(title=self.thread_name,
                                          url=self.host)
                web_site.save()
            try:
                hash_url = HashUrl.objects.get(urls=fiction_url)
                is_exit = True
                fic = Fiction.objects.get(fiction_title=title, author=author)
            except:
                is_exit = False
            print 'here'
            if not is_exit:
                try:
                    hash_url = HashUrl(urls=fiction_url)
                    hash_url.save()
                except:
                    continue
                #if the fiction got by crawler is the newest one
                #get the book infor
                book_infor = get_book_infor(self.host, self.thread_name,
                                            fiction_url, True)
                ids = re.findall(ALL_PATTERN[web_site.title]['ids_pattern'],
                                 fiction_url)
                types = '4' if not STYLE[self.thread_name].has_key(book_infor['types']) else \
                     STYLE[self.thread_name][(book_infor['types'])]
                try:
                    fic = Fiction(
                        fiction_title=title,
                        fiction_avatar_url=book_infor['avatar'],
                        fiction_intro=book_infor['intro'],
                        fiction_id=ids[0],
                        fiction_style=types,
                        total_word=book_infor['total_word'],
                        com_word="",
                        source_site=web_site,
                        click_time=book_infor['click_time'],
                        rec_time=book_infor['rec_time'],
                        author=author,
                        stock_time=0,
                        author_url="",
                    )
                    fic.save()
                    fic.fiction_nid = create_nid(fic.id)
                    fic.save()
                    member = MemberShip(fiction=fic,
                                        website=web_site,
                                        fiction_url=fiction_url)
                    member.save()
                    del member
                except Exception, e:
                    print 'Ever'
                    continue

                #search only by fiction title
                for item in mmseg.Algorithm(title):
                    try:
                        index = Index.objects.get(key=item.text)
                    except:
                        index = Index(key=item.text)
                        index.save()
                    IndexFictionRelationship.objects.create(
                        key=index,
                        fiction=fic,
                        position=','.join([str(item.start),
                                           str(item.end)]),
                        bit='2',  #chapter
                    )
                #get all chapters
                if book_infor.has_key('read_url'):
                    chapter_url = book_infor['read_url']
                else:
                    chapter_url = build_url_fiction(ids[0], web_site.title)
                get_chapters_thread = threading.Thread(
                    target=chapter_func[web_site.title],
                    args=(chapter_url, fic, web_site, '1'))
                get_chapters_thread.start()
                get_chapters_thread.join()
            #if the fiction has been inserted into the database before
            else:
                print "yes it is!"
                #get the max index of chapters
                try:
                    chapter_index = ChapterIndex.objects.get(
                        fiction=fic.id, web_site=web_site.title)
                except Exception, e1:
                    try:
                        chapter_index = ChapterIndex.objects.filter(
                            fiction=fic.id, web_site=web_site.title)[0]
                    except Exception, e:
                        print e
                    print e1
                    continue
예제 #9
0
 def run(self):
     """thread method"""
     
     #get all the fresh information
     _headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",  
                  "Accept": "text/plain"} 
     print self.newest_url
     request = urllib2.Request(self.newest_url, headers = _headers)
     html_page = urllib2.urlopen(request).read() 
     try: 
         import gzip, StringIO
         data = html_page
         data = StringIO.StringIO(data)
         gzipper = gzip.GzipFile(fileobj=data)
         html = gzipper.read()
         html_page = html
     except:
         pass
     html_page = BeautifulSoup(str(html_page))
     content = html_page.findAll(self.content_tag, self.content_dict)
     contents = ''.join([str(item) for item in content])
     chapter_infor = BeautifulSoup(contents)
     content = chapter_infor.findAll(self.chapter_tag, self.chapter_dict)
     indexs = 1
     print content
     for item in content:
         indexs += 1
         contents = str(item)
         types = ''.join(re.findall(self.types_pattern, contents))
         title = ''.join(re.findall(self.title_pattern, contents))
         chapter = ''.join(re.findall(self.chapter_pattern, contents))
         author = ''.join(re.findall(self.author_pattern, contents))
         fiction_url = ''.join(re.findall(self.fiction_url_pattern, contents))
         chapter_url = ''.join(re.findall(self.chapter_url_pattern, contents))
         if not types or not title or \
             not chapter or not author or not fiction_url or not chapter_url:
             continue
         newest_chapter_url = chapter_url
         host = self.host
         print author
         if self.host[len(self.host) - 1] == '/':
             host = self.host[:len(self.host) - 1]
         if chapter_url[0] == '/':
             chapter_url = host + chapter_url
         if fiction_url[0] == '/':
             fiction_url = host + fiction_url
         try:
             web_site = FictionWebSite.objects.get(url = self.host)
         except:
             web_site = FictionWebSite(title = self.thread_name, url = self.host)
             web_site.save()
         try:
             hash_url = HashUrl.objects.get(urls = fiction_url)
             is_exit = True
             fic = Fiction.objects.get(fiction_title = title, author = author)
         except:
             is_exit = False
         print 'here'
         if not is_exit:
             try:
                 hash_url = HashUrl(urls = fiction_url)
                 hash_url.save()
             except:
                 continue
             #if the fiction got by crawler is the newest one
             #get the book infor
             book_infor = get_book_infor(self.host, self.thread_name, fiction_url, True)
             ids = re.findall(ALL_PATTERN[web_site.title]['ids_pattern'], fiction_url)
             types = '4' if not STYLE[self.thread_name].has_key(book_infor['types']) else \
                  STYLE[self.thread_name][(book_infor['types'])]
             try:
                 fic = Fiction(fiction_title = title, 
                     fiction_avatar_url = book_infor['avatar'],
                     fiction_intro = book_infor['intro'],
                     fiction_id = ids[0],
                     fiction_style = types,
                     total_word = book_infor['total_word'],
                     com_word = "",
                     source_site = web_site,
                     click_time = book_infor['click_time'],
                     rec_time = book_infor['rec_time'],
                     author = author,
                     stock_time = 0,
                     author_url = "",
                 )
                 fic.save() 
                 fic.fiction_nid = create_nid(fic.id)
                 fic.save()
                 member = MemberShip(fiction = fic, website = web_site, fiction_url = fiction_url)
                 member.save()
                 del member
             except Exception,e:
                 print 'Ever'
                 continue
             
             #search only by fiction title
             for item in mmseg.Algorithm(title):
                 try:
                     index = Index.objects.get(key = item.text)
                 except:
                     index = Index(key = item.text)
                     index.save()
                 IndexFictionRelationship.objects.create(key = index,
                     fiction = fic,
                     position = ','.join([str(item.start), str(item.end)]),
                     bit = '2',#chapter
                 )
             #get all chapters
             if book_infor.has_key('read_url'):
                 chapter_url = book_infor['read_url']
             else:
                 chapter_url = build_url_fiction(ids[0], web_site.title)
             get_chapters_thread = threading.Thread(target = chapter_func[web_site.title], 
                 args = (chapter_url, fic, web_site, '1'))
             get_chapters_thread.start()
             get_chapters_thread.join()
         #if the fiction has been inserted into the database before
         else:
             print "yes it is!"
             #get the max index of chapters
             try:
                 chapter_index = ChapterIndex.objects.get(fiction = fic.id, web_site = web_site.title)
             except Exception, e1:
                 try:
                     chapter_index = ChapterIndex.objects.filter(fiction = fic.id, web_site = web_site.title)[0]
                 except Exception, e:
                     print e
                 print e1
                 continue
예제 #10
0
def crawler_types(
    types,
    web_site,
    url,
):
    #index stands for the pageIndex
    pattern = ALL_PATTERN[web_site.title]

    for index in range(1, 100):
        new_url = build_url_page(url, web_site.title, index, types)
        html_page = urllib2.urlopen(new_url)
        html_content = html_page.read()
        html_content = gzip_content(html_content)
        #get content
        content = BeautifulSoup(html_content)
        out = content.findAll(pattern['all_content_tag'],
                              pattern['all_content_dict'])
        if not out:
            break
        contents = ''.join([str(item) for item in out])
        chapter = BeautifulSoup(contents)
        fictions = chapter.findAll(pattern['all_fiction_tag'],
                                   pattern['all_fiction_dict'])
        for item in fictions:
            #get fiction title
            contents = str(item)
            try:
                fiction_title = re.findall(pattern['all_fiction_title'],
                                           contents)
                fiction_title = ''.join(
                    [str(_item) for _item in fiction_title])
                #get fiction url
                fiction_url = re.findall(pattern['all_fiction_url'], contents)
                fiction_url = ''.join([str(_item) for _item in fiction_url[0]])
                #get fiction type
                fiction_type = re.findall(pattern['all_fiction_type'],
                                          contents)
                fiction_type = ''.join([str(_item) for _item in fiction_type])
                #get fiction id in thissite
                author_name = re.findall(pattern['all_author_name'], contents)
                author_name = ''.join([str(_item) for _item in author_name])
                ids = re.findall(pattern['ids_pattern'], fiction_url)
                if not fiction_title or not fiction_url or not author_name:
                    continue
                #get write information
            except Exception, e:
                continue
            try:
                hash_url = HashUrl.objects.get(urls=fiction_url)
                continue
            except:
                hash_url = HashUrl(urls=fiction_url)
                hash_url.save()

            fiction_infor = get_book_infor(
                web_site.url,
                web_site.title,
                fiction_url,
            )
            if not fiction_infor:
                continue
            #save tag
            #save all the fiction into database
            try:
                #如果同样的标题的小说已经被收录,唯一性由标题和作者确定
                fic = Fiction.objects.get(fiction_title=fiction_title,
                                          author=author_name)
                #如果所在网站相同,则不继续处理
                DG.trace("get it")
                if fic.source_site.title == web_site.title:
                    continue

            except:
                fic = Fiction(
                    fiction_title=fiction_title,
                    fiction_avatar_url=fiction_infor['avatar'],
                    fiction_intro=fiction_infor['intro'],
                    fiction_id=ids[0],
                    fiction_style=types,
                    total_word=fiction_infor['total_word'],
                    stock_time=10,
                    com_word="",
                    source_site=web_site,
                    click_time=fiction_infor['click_time'],
                    rec_time=fiction_infor['rec_time'],
                    author=author_name,
                    author_url="",
                )
                fic.save()
                fic.fiction_nid = create_nid(fic.id)
                fic.save()
                if isinstance(fiction_title, unicode):
                    fiction_title = fiction_title.encode('utf-8')
                #如果是新加入的小说,则为其建立索引
                #t1 = threading.Thread(target = build_index_database, args = (fiction_title, fic, '1'))
                #t1.start()
                #t1.join()
            #save tags
            for tags in fiction_infor['tags']:
                try:
                    tag = Tag.objects.get(tag=tags)
                except:
                    tag = Tag(tag=tags)
                    tag.save()
                fic.tag.add(tag)
                fic.save()
            #新添小说和网站的联系表
            ms = MemberShip(fiction=fic,
                            website=web_site,
                            fiction_url=fiction_url)
            ms.save()
            #获取该小说的所有章节
            chapter_url = build_url_fiction(ids[0], web_site.title)
            t2 = threading.Thread(target=chapter_func[web_site.title],
                                  args=(chapter_url, fic, web_site))
            t2.start()
            t2.join()