Exemplo n.º 1
0
def real_mapper(queryset):
    this = Message.objects(task=queryset.task, payload=queryset.payload)
    STATE = True
    Model = getattr(models, queryset.task)
    this.update(set__inprocess=True)
    if queryset.task == 'Movie':
        for process in queryset.payload:
            ret = parse.get_movie_info(process)
            ret['movieid'] = process
            models.Movie(**ret).save()
        return
    Parse = getattr(parse, queryset.task + 'Parse')
    for process in queryset.payload:
        try:
            p = Parse(process)
            count = 1
            while 1:
                haspage = p()
                if haspage is None:
                    # 很可能404
                    break
                result, hasnext = haspage
                Model(**result).save()
                # 别名体系, 这样只需要全局记录一个人物就知道他们的全部别名
                for k, v in p._alias.items():
                    models.AliasName.objects.get_or_create(name=k)[0].update(
                        add_to_set__alias=v)
                if hasnext:
                    count += 1
                    url = p.original_url
                    p.set_url(url.replace('.html', '-{}.html'.format(count)))
                else:
                    #没有下一页就退出循环
                    break
        except:
            raise
            STATE = False
        else:
            models.IdFinished.objects(year=queryset.year).update(
                add_to_set__ids=[process])
    if STATE:
        this.update(set__state=2)
    else:
        this.update(set__state=3)
    this.update(set__inprocess=False)
Exemplo n.º 2
0
def real_mapper(queryset):
    this = Message.objects(task=queryset.task, payload=queryset.payload)
    STATE = True
    Model = getattr(models, queryset.task)
    this.update(set__inprocess=True)
    if queryset.task == 'Movie':
        for process in queryset.payload:
            ret = parse.get_movie_info(process)
            ret['movieid'] = process
            models.Movie(**ret).save()
        return
    Parse = getattr(parse, queryset.task + 'Parse')
    for process in queryset.payload:
        try:
            p = Parse(process)
            count = 1
            while 1:
                haspage = p()
                if haspage is None:
                    # 很可能404
                    break
                result, hasnext = haspage
                Model(**result).save()
                # 别名体系, 这样只需要全局记录一个人物就知道他们的全部别名
                for k, v in p._alias.items():
                    models.AliasName.objects.get_or_create(
                        name=k)[0].update(add_to_set__alias=v)
                if hasnext:
                    count += 1
                    url = p.original_url
                    p.set_url(url.replace('.html', '-{}.html'.format(count)))
                else:
                    #没有下一页就退出循环
                    break
        except:
            raise
            STATE = False
        else:
            models.IdFinished.objects(
                year=queryset.year            ).update(add_to_set__ids=[process])
    if STATE:
        this.update(set__state=2)
    else:
        this.update(set__state=3)
    this.update(set__inprocess=False)
Exemplo n.º 3
0
 def __init__(self, map_func, num_workers=None, **kwargs):
     self.map_func = map_func
     self.inputs = Message.objects(state__ne=2, inprocess__ne=True)
     self.pool = multiprocessing.Pool(num_workers, **kwargs)
Exemplo n.º 4
0
                for k, v in p._alias.items():
                    models.AliasName.objects.get_or_create(name=k)[0].update(
                        add_to_set__alias=v)
                if hasnext:
                    count += 1
                    url = p.original_url
                    p.set_url(url.replace('.html', '-{}.html'.format(count)))
                else:
                    #没有下一页就退出循环
                    break
        except:
            raise
            STATE = False
        else:
            models.IdFinished.objects(year=queryset.year).update(
                add_to_set__ids=[process])
    if STATE:
        this.update(set__state=2)
    else:
        this.update(set__state=3)
    this.update(set__inprocess=False)


all = Message.objects(state__ne=2)

for i in all:
    try:
        real_mapper(i)
    except:
        raise
Exemplo n.º 5
0
                # 别名体系, 这样只需要全局记录一个人物就知道他们的全部别名
                for k, v in p._alias.items():
                    models.AliasName.objects.get_or_create(
                        name=k)[0].update(add_to_set__alias=v)
                if hasnext:
                    count += 1
                    url = p.original_url
                    p.set_url(url.replace('.html', '-{}.html'.format(count)))
                else:
                    #没有下一页就退出循环
                    break
        except:
            raise
            STATE = False
        else:
            models.IdFinished.objects(
                year=queryset.year            ).update(add_to_set__ids=[process])
    if STATE:
        this.update(set__state=2)
    else:
        this.update(set__state=3)
    this.update(set__inprocess=False)

all = Message.objects(state__ne=2)

for i in all:
    try:
        real_mapper(i)
    except:
        raise