class InfluencerImportForm(forms.ModelForm): blog_name = forms.CharField(required=True, widget=forms.TextInput(attrs={'class': 'req'})) blog_url = forms.CharField( required=True, widget=forms.TextInput(attrs={'class': 'req blog_url'})) blog_platform = forms.ChoiceField( choices=Platform.blog_platforms_for_select()) twitter = forms.CharField(required=False) extra_twitter = forms.CharField(required=False) facebook = forms.CharField(required=False) pinterest = forms.CharField(required=False) bloglovin = forms.CharField(required=False) instagram = forms.CharField(required=False) extra_instagram = forms.CharField(required=False) blog_aboutme = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 3})) class Meta: model = Influencer fields = [ 'name', 'email', 'demographics_gender', 'source', 'demographics_location' ] widgets = { 'source': forms.Select(choices=Influencer.SOURCE_TYPES), 'demographics_gender': forms.Select(choices=(('male', 'male'), ('female', 'female'))) }
def migrate_pts_from_duplicates(): infs = Influencer.objects.filter(show_on_search=True) plats = Platform.objects.filter(influencer__in=infs).exclude( url_not_found=True) for plat in plats: log.info('plat: %r', plat) plat_ptss = list( plat.popularitytimeseries_set.order_by('snapshot_date')) log.info('plat_ptss: %s', plat_ptss) plat_dates = {pts.snapshot_date for pts in plat_ptss} dups = Platform.find_duplicates(plat.influencer, plat.url, plat.platform_name, plat.id, exclude_url_not_found_true=False) if not dups: log.info('No dups') continue for dup in dups: log.info('dup: %r', dup) dup_ptss = list( dup.popularitytimeseries_set.order_by('snapshot_date')) log.info('dup_ptss: %s', dup_ptss) for pts in dup_ptss: if pts.snapshot_date in plat_dates: log.info('Skipping existing pts %r', pts) continue pts.platform = plat pts.save() log.info('Migrated pts: %r', pts)
def redetect_blog_platforms_for_spreadsheet_import(): infs = Influencer.objects.filter(source='spreadsheet_import', blog_url__isnull=False) infs_count = infs.count() print 'Looking at %s influencers' % infs_count discovered = [] not_discovered = [] for i, inf in enumerate(infs): print 'Processing %s/%s' % (i + 1, infs_count) if not inf.platform_set.filter( platform_name__in=['Custom', 'Blogspot', 'Wordpress' ]).exists(): print '!!! No blog platform for influencer blog_url %r influencer %r' % ( inf.blog_url, inf) try: discovered_pl, corrected_url = fetcher.try_detect_platform_name( inf.blog_url) except Exception as e: print 'Exception %r while try_detect_platform_name' % e continue if discovered_pl: blog_pl = Platform.find_duplicates(inf, inf.blog_url, discovered_pl) if blog_pl and len(blog_pl) > 0: blog_pl = blog_pl[0] else: blog_pl = Platform() blog_pl.influencer = inf blog_pl.platform_name = discovered_pl blog_pl.url = inf.blog_url blog_pl.save() print '+++ Saved platform from blog data: %r' % blog_pl discovered.append(blog_pl) print '\n', len(discovered), discovered, '\n' else: print '--- No platform discovered' not_discovered.append(inf.blog_url) print '\n', len(not_discovered), not_discovered, '\n'
def update_blogs_from_xpaths(csv, start_i, end_i, max_posts=float("inf"), max_pages=float("inf")): """ this command line function reads a csv file which contains information about blogs and their relevant xpaths and parses this information into a list of dictionaries to be fed to the Platform.update_blogs_from_xpaths function @return the result of Platform.update_blogs_from_xpaths (Number of blogs updated if completed, None if error hit) """ blogs = h.read_csv_file(csv, delimiter='\t', dict_keys=[ 'blog_name', 'blog_url', 'post_urls', 'post_title', 'post_content', 'post_date', 'post_comments', 'next_page', '' ]) return Platform.update_blogs_from_xpaths(blogs, int(start_i), int(end_i), max_posts=max_posts, max_pages=max_pages)
def update_or_create_new_platform(influencer, platform_name, platform_url): from debra.models import Platform dups = Platform.find_duplicates(influencer, platform_url, platform_name, exclude_url_not_found_true=False) if dups and len(dups) > 0: print "Found duplicates for %r " % platform_url d = dups[0] d = d.handle_duplicates() d.url_not_found = False d.validated = True d.url = platform_url d.save() print "Handled duplicates, final platform staying: %r " % d return d else: d = Platform.objects.create(influencer=influencer, url=platform_url, platform_name=platform_name) d.validated = True d.save() print "Created a new platform: %r " % d return d
def create_influencers_platforms_from_csv(filename, from_row='1', to_row='999999'): """Works with https://docs.google.com/spreadsheet/ccc?key=0Ai2GPRwzn6lmdEMzWVR0aldXYXJodGplZlVGRVMyQ1E&usp=sharing . To download CSV, add output=csv to the link: https://docs.google.com/spreadsheet/ccc?key=0Ai2GPRwzn6lmdEMzWVR0aldXYXJodGplZlVGRVMyQ1E&usp=sharing&output=csv """ reader = spreadsheet_reader(filename) count = 0 from_row = int(from_row) to_row = int(to_row) for row in reader: print "\n\nCount: %d" % count count += 1 if count < from_row: print 'Skipping row %d' % count continue if count > to_row: print 'Skipping row %d' % count continue if row['email'] == 'email': # First title row continue if not (row['url'] or '').strip(): # Empty row continue print 'Processing row %r' % row duplicate_infs = Influencer.find_duplicates(blog_url=row['url']) if len(duplicate_infs) > 0: inf = duplicate_infs[0] inf.handle_duplicates() print 'Using already saved influencer: %r' % inf else: inf = Influencer() #update info inf.source = 'spreadsheet_import' inf.name = row['blogger_name'] inf.blog_url = row['url'] inf.email = row['email'] inf.demographics_location = row['location'] inf.demographics_gender = row['gender'] assert False, 'This script requires code update to *_url fields processing' if row['Facebook']: inf.fb_url = row['Facebook'] if row['Pinterest']: inf.pin_url = row['Pinterest'] if row['Twitter']: inf.tw_url = row['Twitter'] if row['Instagram']: inf.insta_url = row['Instagram'] if row['Bloglovin']: inf.bloglovin_url = row['Bloglovin'] inf.save() print 'Saved new influencer: %r' % inf # Try to save blog as platform if row['url']: blog_pl = Platform.objects.filter(url=row['url']) if blog_pl.exists(): print "Blog already exists for url %s [%s]" % (row['url'], blog_pl) else: discovered_pl, corrected_url = fetcher.try_detect_platform_name( row['url']) if discovered_pl: blog_pl = Platform.find_duplicates(inf, row['url'], discovered_pl) if blog_pl and len(blog_pl) > 0: blog_pl = blog_pl[0] blog_pl = blog_pl.handle_duplicates() else: blog_pl = Platform() blog_pl.influencer = inf blog_pl.platform_name = discovered_pl blog_pl.url = row['url'] blog_pl.blogname = row['blog_name'] blog_pl.save() print 'Saved platform from blog data: %r' % blog_pl else: print 'No platform discovered for blog url %r' % row['url'] for platform_name in ('Facebook', 'Twitter', 'Pinterest', 'Bloglovin', 'Instagram'): if not row[platform_name]: print 'No url for platform %r' % platform_name continue pl = Platform.find_duplicates(inf, row[platform_name], platform_name) if pl and len(pl) > 0: pl = pl[0] pl = pl.handle_duplicates() else: pl = Platform() pl.influencer = inf pl.platform_name = platform_name pl.url = row[platform_name] pl.save() print 'Saved new platform %r' % pl
def test_is_social_platform(self, url, is_social): assert Platform.is_social_platform(url) is is_social