示例#1
0
class InfluencerImportForm(forms.ModelForm):
    blog_name = forms.CharField(required=True,
                                widget=forms.TextInput(attrs={'class': 'req'}))
    blog_url = forms.CharField(
        required=True, widget=forms.TextInput(attrs={'class': 'req blog_url'}))
    blog_platform = forms.ChoiceField(
        choices=Platform.blog_platforms_for_select())
    twitter = forms.CharField(required=False)
    extra_twitter = forms.CharField(required=False)
    facebook = forms.CharField(required=False)
    pinterest = forms.CharField(required=False)
    bloglovin = forms.CharField(required=False)
    instagram = forms.CharField(required=False)
    extra_instagram = forms.CharField(required=False)
    blog_aboutme = forms.CharField(required=False,
                                   widget=forms.Textarea(attrs={'rows': 3}))

    class Meta:
        model = Influencer
        fields = [
            'name', 'email', 'demographics_gender', 'source',
            'demographics_location'
        ]
        widgets = {
            'source':
            forms.Select(choices=Influencer.SOURCE_TYPES),
            'demographics_gender':
            forms.Select(choices=(('male', 'male'), ('female', 'female')))
        }
示例#2
0
def migrate_pts_from_duplicates():
    infs = Influencer.objects.filter(show_on_search=True)
    plats = Platform.objects.filter(influencer__in=infs).exclude(
        url_not_found=True)
    for plat in plats:
        log.info('plat: %r', plat)
        plat_ptss = list(
            plat.popularitytimeseries_set.order_by('snapshot_date'))
        log.info('plat_ptss: %s', plat_ptss)
        plat_dates = {pts.snapshot_date for pts in plat_ptss}
        dups = Platform.find_duplicates(plat.influencer,
                                        plat.url,
                                        plat.platform_name,
                                        plat.id,
                                        exclude_url_not_found_true=False)
        if not dups:
            log.info('No dups')
            continue
        for dup in dups:
            log.info('dup: %r', dup)
            dup_ptss = list(
                dup.popularitytimeseries_set.order_by('snapshot_date'))
            log.info('dup_ptss: %s', dup_ptss)
            for pts in dup_ptss:
                if pts.snapshot_date in plat_dates:
                    log.info('Skipping existing pts %r', pts)
                    continue
                pts.platform = plat
                pts.save()
                log.info('Migrated pts: %r', pts)
示例#3
0
def redetect_blog_platforms_for_spreadsheet_import():
    infs = Influencer.objects.filter(source='spreadsheet_import',
                                     blog_url__isnull=False)
    infs_count = infs.count()
    print 'Looking at %s influencers' % infs_count
    discovered = []
    not_discovered = []
    for i, inf in enumerate(infs):
        print 'Processing %s/%s' % (i + 1, infs_count)
        if not inf.platform_set.filter(
                platform_name__in=['Custom', 'Blogspot', 'Wordpress'
                                   ]).exists():
            print '!!! No blog platform for influencer blog_url %r influencer %r' % (
                inf.blog_url, inf)
            try:
                discovered_pl, corrected_url = fetcher.try_detect_platform_name(
                    inf.blog_url)
            except Exception as e:
                print 'Exception %r while try_detect_platform_name' % e
                continue
            if discovered_pl:
                blog_pl = Platform.find_duplicates(inf, inf.blog_url,
                                                   discovered_pl)
                if blog_pl and len(blog_pl) > 0:
                    blog_pl = blog_pl[0]
                else:
                    blog_pl = Platform()
                blog_pl.influencer = inf
                blog_pl.platform_name = discovered_pl
                blog_pl.url = inf.blog_url
                blog_pl.save()
                print '+++ Saved platform from blog data: %r' % blog_pl
                discovered.append(blog_pl)
                print '\n', len(discovered), discovered, '\n'
            else:
                print '--- No platform discovered'
                not_discovered.append(inf.blog_url)
                print '\n', len(not_discovered), not_discovered, '\n'
示例#4
0
def update_blogs_from_xpaths(csv,
                             start_i,
                             end_i,
                             max_posts=float("inf"),
                             max_pages=float("inf")):
    """
    this command line function reads a csv file which contains information about blogs and their relevant xpaths and
    parses this information into a list of dictionaries to be fed to the Platform.update_blogs_from_xpaths function
    @return the result of Platform.update_blogs_from_xpaths (Number of blogs updated if completed, None if error hit)
    """
    blogs = h.read_csv_file(csv,
                            delimiter='\t',
                            dict_keys=[
                                'blog_name', 'blog_url', 'post_urls',
                                'post_title', 'post_content', 'post_date',
                                'post_comments', 'next_page', ''
                            ])
    return Platform.update_blogs_from_xpaths(blogs,
                                             int(start_i),
                                             int(end_i),
                                             max_posts=max_posts,
                                             max_pages=max_pages)
示例#5
0
def update_or_create_new_platform(influencer, platform_name, platform_url):
    from debra.models import Platform
    dups = Platform.find_duplicates(influencer,
                                    platform_url,
                                    platform_name,
                                    exclude_url_not_found_true=False)
    if dups and len(dups) > 0:
        print "Found duplicates for %r " % platform_url
        d = dups[0]
        d = d.handle_duplicates()
        d.url_not_found = False
        d.validated = True
        d.url = platform_url
        d.save()
        print "Handled duplicates, final platform staying: %r " % d
        return d
    else:
        d = Platform.objects.create(influencer=influencer,
                                    url=platform_url,
                                    platform_name=platform_name)
        d.validated = True
        d.save()
        print "Created a new platform: %r " % d
        return d
示例#6
0
def create_influencers_platforms_from_csv(filename,
                                          from_row='1',
                                          to_row='999999'):
    """Works with https://docs.google.com/spreadsheet/ccc?key=0Ai2GPRwzn6lmdEMzWVR0aldXYXJodGplZlVGRVMyQ1E&usp=sharing . To download CSV, add output=csv to the link: https://docs.google.com/spreadsheet/ccc?key=0Ai2GPRwzn6lmdEMzWVR0aldXYXJodGplZlVGRVMyQ1E&usp=sharing&output=csv
    """
    reader = spreadsheet_reader(filename)
    count = 0
    from_row = int(from_row)
    to_row = int(to_row)
    for row in reader:
        print "\n\nCount: %d" % count
        count += 1
        if count < from_row:
            print 'Skipping row %d' % count
            continue
        if count > to_row:
            print 'Skipping row %d' % count
            continue
        if row['email'] == 'email':
            # First title row
            continue
        if not (row['url'] or '').strip():
            # Empty row
            continue
        print 'Processing row %r' % row
        duplicate_infs = Influencer.find_duplicates(blog_url=row['url'])
        if len(duplicate_infs) > 0:
            inf = duplicate_infs[0]
            inf.handle_duplicates()
            print 'Using already saved influencer: %r' % inf
        else:
            inf = Influencer()
        #update info
        inf.source = 'spreadsheet_import'
        inf.name = row['blogger_name']
        inf.blog_url = row['url']
        inf.email = row['email']
        inf.demographics_location = row['location']
        inf.demographics_gender = row['gender']
        assert False, 'This script requires code update to *_url fields processing'
        if row['Facebook']:
            inf.fb_url = row['Facebook']
        if row['Pinterest']:
            inf.pin_url = row['Pinterest']
        if row['Twitter']:
            inf.tw_url = row['Twitter']
        if row['Instagram']:
            inf.insta_url = row['Instagram']
        if row['Bloglovin']:
            inf.bloglovin_url = row['Bloglovin']
        inf.save()
        print 'Saved new influencer: %r' % inf

        # Try to save blog as platform
        if row['url']:
            blog_pl = Platform.objects.filter(url=row['url'])
            if blog_pl.exists():
                print "Blog already exists for url %s [%s]" % (row['url'],
                                                               blog_pl)
            else:
                discovered_pl, corrected_url = fetcher.try_detect_platform_name(
                    row['url'])
                if discovered_pl:
                    blog_pl = Platform.find_duplicates(inf, row['url'],
                                                       discovered_pl)
                    if blog_pl and len(blog_pl) > 0:
                        blog_pl = blog_pl[0]
                        blog_pl = blog_pl.handle_duplicates()
                    else:
                        blog_pl = Platform()
                    blog_pl.influencer = inf
                    blog_pl.platform_name = discovered_pl
                    blog_pl.url = row['url']
                    blog_pl.blogname = row['blog_name']
                    blog_pl.save()
                    print 'Saved platform from blog data: %r' % blog_pl
                else:
                    print 'No platform discovered for blog url %r' % row['url']

        for platform_name in ('Facebook', 'Twitter', 'Pinterest', 'Bloglovin',
                              'Instagram'):
            if not row[platform_name]:
                print 'No url for platform %r' % platform_name
                continue
            pl = Platform.find_duplicates(inf, row[platform_name],
                                          platform_name)
            if pl and len(pl) > 0:
                pl = pl[0]
                pl = pl.handle_duplicates()
            else:
                pl = Platform()
            pl.influencer = inf
            pl.platform_name = platform_name
            pl.url = row[platform_name]
            pl.save()
            print 'Saved new platform %r' % pl
示例#7
0
 def test_is_social_platform(self, url, is_social):
     assert Platform.is_social_platform(url) is is_social