Exemplo n.º 1
0
def crawl_view(request):
    """Renders the crawl page."""
    if request.method == 'POST':
        form = crawl_form(request.POST)
        form.is_valid()
        # called from crawh.html
        if form.is_valid():
            from_date = form.cleaned_data['from_date']
            nrpages = form.cleaned_data['nrpages_field']
            site_choices = form.cleaned_data['site_choices_field']
            scrape_choices = form.cleaned_data['scrape_choices_field']
            rss_field = form.cleaned_data['rss_field']
            product_field = form.cleaned_data['product_field']
            username = form.cleaned_data['username']
            password = form.cleaned_data['password']
            if from_date == None:
                today = datetime.now()
                from_date = datetime(today.year-1, 1, 1, 0, 0, 0)
            if 'crawl_si_sites' in form.data:
                for site_choice in site_choices:
                    if site_choice == 'apf':
                        crawl.crawl_apf(scrape_choices, nrpages)
                    elif site_choice == 'cosmetics':
                        crawl.crawl_cosmetic(scrape_choices, nrpages)
                    else:
                        crawl.si_site(site_choice, nrpages)
            elif 'crawl_mi' in form.data:
                if not market.index_posts(from_date, username, password):
                    form.add_form_error("Could not index category posts")
            if 'crawl_pi' in form.data:
                if product_field == '':
                    form.add_form_error("Specify a product")
                else:
                    if not product.crawl_product(index_choices, product_field):
                        form.add_form_error("Could not save product data")
            if 'index_pi' in form.data:
                if product_field == '':
                    form.add_form_error("Specify a product")
                else:
                    if not product.index_product(index_choices, product_field):
                        form.add_form_error("Could not retrieve product data")
            if 'crawl_feedly' in form.data:
                if not crawl.crawl_feedly(from_date, rss_field):
                     form.add_form_error("Could not retrieve feedly data, expired")
            if 'return_survey' in form.data:
                pass
            return render(request, 'app/crawl.html', {'form': form, 'es_hosts' : FMI.settings.ES_HOSTS, 'scrape_li' : models.scrape_li } )
    else:
        form = crawl_form(initial={'scrape_choices_field':['product', 'blog'], 'excel_choices_field':['recreate']})

    return render(request, 'app/crawl.html', {'form': form, 'es_hosts' : FMI.settings.ES_HOSTS,
                  'message':'IFF - Insight Platform', 'year':datetime.now().year})
Exemplo n.º 2
0
def crawl_view(request):
    """Renders the crawl page."""
    sections = {}
    if request.method == 'POST':
        form = crawl_form(request.POST)
        form.is_valid()
        # called from crawh.html
        if form.is_valid():
            from_dt = form.cleaned_data['from_dt']
            nrpages = form.cleaned_data['nrpages_field']
            site_choices = form.cleaned_data['site_choices_field']
            scrape_choices = form.cleaned_data['scrape_choices_field']
            rss_field = form.cleaned_data['rss_field']
            brand_name = form.cleaned_data['brand_name_field']
            brand_variant = form.cleaned_data['brand_variant_field']
            perfume_code = form.cleaned_data['perfume_code_field'].lower()
            username = form.cleaned_data['username']
            password = form.cleaned_data['password']
            if from_dt == None:
                today = datetime.now()
                from_dt = datetime(today.year-1, 1, 1).date()
            # Crawl BLOG Sites
            if 'crawl_si_sites' in form.data:
                for site_choice in site_choices:
                    if site_choice == 'apf':
                        crawl.crawl_apf(from_dt, scrape_choices, nrpages)
                    elif site_choice == 'cosmetics':
                        crawl.crawl_cosmetic(from_dt, scrape_choices, nrpages)
                    else:
                        crawl.crawl_si_site(from_dt, site_choice, nrpages)
            # Crawl Market Intelligence Sites
            if 'crawl_feedly' in form.data:
                if not crawl.crawl_feedly(from_dt, rss_field):
                     form.add_form_error("Could not retrieve feedly data, expired")
            elif 'crawl_mi' in form.data:
                if not market.index_posts(from_dt, username, password):
                    form.add_form_error("Could not index category posts")
            # Crawl Product Intelligence Sites
            if 'crawl_pi' in form.data and 'pi_site_choices_field' in request.POST:
                pi_site_choices = request.POST['pi_site_choices_field']
                if perfume_code == '':
                    form.add_form_error("Specify a product code")
                else:
                    if pi_site_choices == 'fragrantica':
                        success = product.crawl_fragrantica(from_dt, brand_name, brand_variant, perfume_code)
                    elif pi_site_choices == 'amazon':
                        success = product.crawl_amazon(from_dt, brand_name, brand_variant, perfume_code)
                    elif pi_site_choices == 'basenotes':
                        success = product.crawl_basenotes(from_dt, brand_name, brand_variant, perfume_code)
                    if not success:
                        form.add_form_error("Could not save product data")
            if 'retrieve_pi' in form.data and 'pi_site_choices_field' in request.POST:
                pi_site_choices = request.POST['pi_site_choices_field']
                if perfume_code == '':
                    form.add_form_error("Specify a product code")
                else:
                    if pi_site_choices == 'fragrantica':
                        success = product.retrieve_fragrantica(perfume_code)
                    elif pi_site_choices == 'amazon':
                        success = product.retrieve_amazon(perfume_code)
                    if pi_site_choices == 'basenotes':
                        success = product.retrieve_basenotes(perfume_code)
                    if not success:
                        form.add_form_error("Could not save product data")
            if 'return_survey' in form.data:
                pass
            return render(
                request,
                'app/crawl.html',
                {'site' : FMI.settings.site, 'form': form, 'es_hosts' : FMI.settings.ES_HOSTS, 'sections' : sections, 'scrape_li' : models.scrape_li }
            )
    else:
        form = crawl_form(initial={'scrape_choices_field':['product', 'blog'], 'excel_choices_field':['recreate']})
        if 'crawl_pi' in request.GET:
            sections['crawl_mi'] = 0
            sections['crawl_feedly'] = 0
            sections['crawl_si_sites'] = 0
            sections['crawl_pi'] = 1
        else:
            sections['crawl_mi'] = 1
            sections['crawl_feedly'] = 1
            sections['crawl_si_sites'] = 1
            sections['crawl_pi'] = 1

    return render(
        request,
        'app/crawl.html',
        {'site' : FMI.settings.site, 'form': form, 'es_hosts' : FMI.settings.ES_HOSTS, 'sections' : sections, 'year':datetime.now().year})
Exemplo n.º 3
0
def crawl_view(request):
    """Renders the crawl page."""
    if request.method == 'POST':
        form = crawl_form(request.POST)
        form.is_valid()
        ci_filename = form.cleaned_data['ci_filename_field']
        cimap_filename = form.cleaned_data['cimap_filename_field']
        # called form crawlresults.html
        if 'crawl_survey' in form.data:
            crawl.crawl_survey(ci_filename, cimap_filename)
        # called from crawh.html
        if form.is_valid():
            from_date = form.cleaned_data['from_date']
            nrpages = form.cleaned_data['nrpages_field']
            site_choices = form.cleaned_data['site_choices_field']
            scrape_choices = form.cleaned_data['scrape_choices_field']
            rss_field = form.cleaned_data['rss_field']
            product_field = form.cleaned_data['product_field']
            username = form.cleaned_data['username']
            password = form.cleaned_data['password']
            cft_filename = form.cleaned_data['cft_filename_field']
            excel_choices = form.cleaned_data['excel_choices_field']
            excel_filename = form.cleaned_data['excel_filename_field']
            ci_filename = form.cleaned_data['ci_filename_field']
            cimap_filename = form.cleaned_data['cimap_filename_field']
            if from_date == None:
                today = datetime.now()
                from_date = datetime(today.year - 1, 1, 1, 0, 0, 0)
            if 'crawl_si_sites' in form.data:
                for site_choice in site_choices:
                    if site_choice == 'apf':
                        crawl.crawl_apf(scrape_choices, nrpages)
                    elif site_choice == 'cosmetics':
                        crawl.crawl_cosmetic(scrape_choices, nrpages)
                    else:
                        crawl.si_site(site_choice, nrpages)
            elif 'crawl_mi' in form.data:
                if not market.index_posts(from_date, username, password):
                    form.add_form_error("Could not index category posts")
            if 'crawl_excel' in form.data:
                if not crawl.crawl_excel(excel_filename, excel_choices):
                    form.add_form_error(
                        "Could not retrieve or index excel file")
            if 'crawl_pi' in form.data:
                if product_field == '':
                    form.add_form_error("Specify a product")
                else:
                    if not product.crawl_product(index_choices, product_field):
                        form.add_form_error("Could not save product data")
            if 'index_pi' in form.data:
                if product_field == '':
                    form.add_form_error("Specify a product")
                else:
                    if not product.index_product(index_choices, product_field):
                        form.add_form_error("Could not retrieve product data")
            if 'crawl_feedly' in form.data:
                if not crawl.crawl_feedly(from_date, rss_field):
                    form.add_form_error(
                        "Could not retrieve feedly data, expired")
            if 'crawl_scentemotion' in form.data:
                crawl.crawl_scentemotion(cft_filename)
            answers = []
            for question in survey.qa.keys():
                answers.extend(list(survey.qa[question].keys()))
            if 'map_survey' in form.data:
                col_map = crawl.map_survey(ci_filename, cimap_filename)
                answers = sorted(answers)
                context = {
                    'form': form,
                    'col_map': col_map,
                    'answers': answers,
                    'qa': survey.qa,
                }
                return render(request, 'app/crawlresults.html', context)
            if 'return_survey' in form.data:
                pass
            return render(
                request, 'app/crawl.html', {
                    'form': form,
                    'es_hosts': FMI.settings.ES_HOSTS,
                    'scrape_li': models.scrape_li
                })
    else:
        form = crawl_form(
            initial={
                'scrape_choices_field': ['product', 'blog'],
                'excel_choices_field': ['recreate']
            })

    return render(request,
                  'app/crawl.html', {
                      'form': form,
                      'es_hosts': FMI.settings.ES_HOSTS
                  },
                  context_instance=RequestContext(
                      request, {
                          'message': 'IFF - Insight Platform',
                          'year': datetime.now().year,
                      }))