示例#1
0
    def test_create_ads(self):

        tag1 = Tag.objects.create(name="HTML 5", slug="html5")
        tag2 = Tag.objects.create(name="jQuery", slug="jquery")
        tag3 = Tag.objects.create(name="Python", slug="python")

        self.assertEquals(0, Ad.objects.count())

        ad1 = Ad()
        ad1.company_name = "ACME International"
        ad1.company_url = "http://acme.com/"
        ad1.title = "Ninja Top Developer"
        ad1.description = """
        We are growing, we need more developers.
        Apply if you are cool and sexy.
        """
        ad1.apply_email = "*****@*****.**"

        ad1.full_clean()
        ad1.save()

        self.assertEquals(1, Ad.objects.count())

        ad1.tags.add(tag1)

        self.assertEquals(1, tag1.ads.count())

        tag2.ads.add(ad1)

        self.assertEquals(2, ad1.tags.count())

        self.assertEquals(0, tag3.ads.count())
示例#2
0
	def setUp(self):
		newspaper1 = Newspaper(name='Test Newspaper 1')
		newspaper1.save()

		ad = Ad()
		ad.name = 'Name 1'
		ad.content = 'Content 1'
		ad.save()
示例#3
0
 def post(self, request, pk):
     print("Add PK", pk)
     t = get_object_or_404(Ad, id=pk)
     ad = Ad(user=request.user, ad=t)
     try:
         ad.save()  # In case of duplicate key
     except IntegrityError as e:
         pass
     return HttpResponse()
示例#4
0
	def setUp(self):
		ad1 = Ad()
		ad1.name = 'Test Ad 1'
		ad1.content = 'Content 1'
		ad1.save()

		ad2 = Ad()
		ad2.name = 'Test Ad 2'
		ad2.content = 'Content 2'
		ad2.save()
示例#5
0
def post_ad(request):

    files = {'images': [], 'video': []}

    if request.method == "POST":

        from_class = Ad.get_form_class(request.POST.get('category'), request.POST.get('sub_category'),
                                       request.POST.get('offering') == "True")
        form = from_class(request.POST, anonym=(not request.user.is_authenticated()))
        if form.is_valid():
            obj = form.save(commit=False)  # returns unsaved instance
            if request.user.is_authenticated():
                obj.user = request.user
                if not obj.district:
                    obj.town = Town.objects.get(pk=request.session.get('TOWN_SELECTED', 1))
                obj.save()
                obj.blocked = WordAnalyser.block_object(obj, Ad.fields_for_analyse)
                if obj.blocked is not False:
                    obj.save()
            else:
                try:
                    obj.user = get_user_model().objects.get(email=form.cleaned_data['email'])
                except get_user_model().DoesNotExist:
                    obj.user = get_user_model().objects.create_user(form.cleaned_data['email'], None, is_active=False)
                finally:
                    obj.save_as_disabled(form.cleaned_data['email'])

            tasks.find_similar(obj.id)
            Ad.save_files(request.POST, obj)
            if obj.disabled:
                return redirect('activate_ad', ad_id=obj.id)
            else:
                messages.success(request, _('Your add was successfully added.'))
                return redirect(obj.get_absolute_url())
        else:
            for img_id in request.POST.getlist('images[]'):
                file_obj = ImageAttachment.objects.get(pk=img_id)
                if file_obj:
                    files['images'].append(file_obj)
            for vid_id in request.POST.getlist('video[]'):
                file_obj = VideoAttachment.objects.get(pk=vid_id)
                if file_obj:
                    files['video'].append(file_obj)
    else:
        from_class = Ad.get_form_class(offering=True)
        form = from_class(anonym=(not request.user.is_authenticated()))

    return render(request, 'ads/post_ad.html', {'form': form,
                                                'files': files,
                                                'images_limit': settings.UPLOAD_IMAGES_LIMIT - len(files['images']),
                                                'video_limit': settings.UPLOAD_VIDEO_LIMIT - len(files['video'])})
示例#6
0
def events_all(request):
    ads = Ad.get_active_ads()
    events_list = Event.objects.filter(status="published").order_by(
        "-start_date", "-start_time")
    cities = events_list.values_list('city',
                                     flat=True).distinct().order_by('city')
    categories = EventCategory.objects.all().order_by('title')

    filtered_cities = request.GET.getlist('location')
    if filtered_cities:
        events_list = events_list.filter(city__in=filtered_cities)

    filtered_categories = request.GET.getlist('category')
    if filtered_categories:
        events_list = events_list.filter(
            categories__title__in=filtered_categories)

    page = request.GET.get("page", 1)
    paginator = Paginator(events_list, 10)
    try:
        events = paginator.page(page)
    except PageNotAnInteger:
        events = paginator.page(1)
    except EmptyPage:
        events = paginator.page(paginator.num_pages)
    context = {
        "events": events,
        "cities": cities,
        "categories": categories,
        "filtered_cities": filtered_cities,
        "filtered_categories": filtered_categories,
        "ads": ads
    }
    return render(request, "events/index.html", context)
示例#7
0
 def post(self, request):
     """
     Procesa el formulario para crear un anuncio
     :param request: objeto HttpRequest
     :return: HttpResponse con la respuesta
     """
     ad = Ad()
     ad.owner = request.user
     form = AdForm(request.POST, request.FILES, instance=ad)
     if form.is_valid():
         # creamos el anuncio
         ad = form.save()
         # limpiar el formulario
         form = AdForm()
         # Devolvemos un mensaje de OK
         messages.success(request, 'Anuncio creado correctamente')
     context = {'form': form}
     return render(request, 'ads/form.html', context)
示例#8
0
    def setUp(self):
        newspaper1 = Newspaper(name='Test Newspaper 1')
        newspaper1.save()

        newspaper2 = Newspaper(name='Test Newspaper 2')
        newspaper2.save()

        ad = Ad()
        ad.name = 'Name 1'
        ad.content = 'Content 1'
        ad.save()
        ad.newspapers.add(newspaper1, newspaper2)
        ad.save()
示例#9
0
def listing_single(request, slug, pk):
    ads = Ad.get_active_ads()
    listing = get_object_or_404(Listing, slug=slug, id=pk)
    user_liked = listing.is_liked_by(request.user)
    if listing.status == 'draft' and request.user != listing.created_by:
        return render(request, "private.html")
    report_post_form = ReportPostForm()
    categories = [{"title": cat.title, "category_group": cat.category_group.title} for cat in listing.categories.all()]
    context = {"listing": listing, "categories": categories, "report_post_form": report_post_form, "ads": ads,
               "user_liked": user_liked}
    return render(request, "listings/single.html", context)
示例#10
0
def events_single(request, slug, pk):
    ads = Ad.get_active_ads()
    event = get_object_or_404(Event, slug=slug, id=pk)
    if event.status == 'draft' and request.user != event.created_by:
        return render(request, "private.html")
    report_post_form = ReportPostForm()
    context = {
        "event": event,
        "report_post_form": report_post_form,
        "ads": ads
    }
    return render(request, "events/single.html", context)
示例#11
0
def home(request):
    ads = Ad.get_active_ads()
    listings = Listing.objects.filter(
        status="published").order_by("-created_at")[:15]
    upcoming_events = Event.get_upcoming_events()
    latest_forum_activity = ForumThread.get_latest()
    context = {
        "ads": ads,
        "listings": listings,
        "upcoming_events": upcoming_events,
        "latest_forum_activity": latest_forum_activity
    }
    return render(request, "pages/home.html", context)
示例#12
0
    def setUp(self):
        newspaper1 = Newspaper(name='Test Newspaper 1')
        newspaper1.save()

        ad = Ad()
        ad.name = 'Name 1'
        ad.content = 'Content 1'
        ad.save()
示例#13
0
def ajax_get_fields(request, cat_id, sub_cat_id):
    if sub_cat_id != '0':
        cat = SubCategory.objects.get(pk=sub_cat_id)
        if cat.form_add is None:
            cat = cat.category
    else:
        cat = Category.objects.get(pk=cat_id)

    from_class = Ad.get_form_class(cat_id, sub_cat_id if sub_cat_id != '0' else None,
                                   offering=not request.GET.get('looking', False))

    form = from_class(initial={'category': cat_id, 'sub_category': sub_cat_id})
    cont = {'form': form} # if cat.form_add else {}
    return render(request, 'ads/additional_fields.html', cont)
示例#14
0
	def setUp(self):
		newspaper1 = Newspaper(name='Test Newspaper 1')
		newspaper1.save()

		newspaper2 = Newspaper(name='Test Newspaper 2')
		newspaper2.save()

		ad = Ad()
		ad.name = 'Name 1'
		ad.content = 'Content 1'
		ad.save()
		ad.newspapers.add(newspaper1, newspaper2)
		ad.save()
示例#15
0
def edit_ad(request, ad_id=None):
    instance = get_object_or_404(Ad, id=ad_id, user_id=request.user.id)

    if request.method == "POST":
        cache_key = 'detail_%s' % ad_id
        cache.delete(cache_key)
        offering = request.POST.get('offering')
        if offering:
            offering = True if offering == 'True' else False

        from_class = Ad.get_form_class(request.POST.get('category'),
                                       request.POST.get('sub_category'),
                                       (offering if offering is not None else instance.offering))

        form = from_class(request.POST, instance=instance)

        if form.is_valid():
            obj = form.save(commit=False)
            obj.blocked = WordAnalyser.block_object(obj, Ad.fields_for_analyse)
            obj.save()
            tasks.find_similar(obj.id)
            Ad.save_files(request.POST, obj)
            messages.success(request, _('Your add was successfully updated.'))
            return redirect(obj.get_absolute_url())
    else:
        from_class = Ad.get_form_class(instance.category,
                                       instance.sub_category if hasattr(instance, 'sub_category') else None,
                                       instance.offering)
        form = from_class(instance=instance)

    files = {'images': instance.imageattachment_set.all(), 'video': instance.videoattachment_set.all()}

    return render(request, 'ads/post_ad.html', {'form': form,
                                                'files': files,
                                                'images_limit': settings.UPLOAD_IMAGES_LIMIT - len(files['images']),
                                                'video_limit': settings.UPLOAD_VIDEO_LIMIT - len(files['video'])})
示例#16
0
def listings_all(request, category=None):
    ads = Ad.get_active_ads()
    listings = Listing.objects.all().order_by("-created_at").filter(status='published')
    cities = listings.values_list('city', flat=True).distinct().order_by('city')
    # categories = Category.objects.all(category_group=listing_group).order_by('title')
    categories = CategoryGroup.objects.all().order_by('title')
    subcategories = None
    filtered_cities = request.GET.getlist('location')
    if filtered_cities:
        listings = listings.filter(city__in=filtered_cities)

    filtered_categories = request.GET.getlist('category')
    if filtered_categories:
        subcategories = [x.title for x in
                         Category.objects.filter(category_group__title=filtered_categories[0]).order_by('title')]
        listings = listings.distinct().filter(categories__title__in=subcategories)

    filtered_subcategories = request.GET.getlist('subcategory')
    if filtered_subcategories:
        listings = listings.filter(categories__title__in=filtered_subcategories)

    page = request.GET.get("page", 1)
    paginator = Paginator(listings, 10)
    try:
        listings = paginator.page(page)
    except PageNotAnInteger:
        listings = paginator.page(1)
    except EmptyPage:
        listings = paginator.page(paginator.num_pages)

    context = {
        "listings": listings,
        "cities": cities,
        "categories": categories,
        "subcategories": subcategories,
        "category_group": "Listings",
        "filtered_cities": filtered_cities,
        "filtered_categories": filtered_categories,
        "filtered_subcategories": filtered_subcategories,
        "ads": ads,
    }

    return render(request, "listings/index.html", context)
示例#17
0
def autocomplete(request):
    sqs = Ad.search(request.GET, True)[:5]
    suggestions = [result.title for result in sqs]
    import re
    q = request.GET.get('q', '')
    regexp = re.compile(r'[\w]*[\s\W]*[\w]*' + re.escape(q) + r'[\w]*[\s\W]*[\w]*', re.IGNORECASE | re.UNICODE)
    res = []
    for item in suggestions:
        match = regexp.search(item)
        if match is not None:
            res.append(match.group(0))
        else:
            res.append(item)

    # Make sure you return a JSON object, not a bare list.
    # Otherwise, you could be vulnerable to an XSS attack.
    the_data = json.dumps({
        'results': res
    })
    return HttpResponse(the_data, content_type='application/json')
示例#18
0
文件: tests.py 项目: bobfp/acme_ads
    def setUp(self):
        self.browser = webdriver.Firefox()
        self.browser.implicitly_wait(3)

        ad1 = Ad()
        ad1.name = 'Test Ad 1'
        ad1.content = 'Content 1'
        ad1.save()

        ad2 = Ad()
        ad2.name = 'Test Ad 2'
        ad2.content = 'Content 2'
        ad2.save()

        ad3 = Ad()
        ad3.name = 'Test Ad 3'
        ad3.content = 'Content 3'
        ad3.save()
示例#19
0
文件: tests.py 项目: bobfp/acme_ads
    def setUp(self):
        newspaper1 = Newspaper()
        newspaper1.name = 'Test Newspaper 1'
        newspaper1.save()

        newspaper2 = Newspaper()
        newspaper2.name = 'Test Newspaper 2'
        newspaper2.save()

        newspaper3 = Newspaper()
        newspaper3.name = 'Test Newspaper 3'
        newspaper3.save()

        ad1 = Ad()
        ad1.name = 'Test Ad 1'
        ad1.id = 1
        ad1.content = 'Content 1'
        ad1.save()

        ad2 = Ad()
        ad2.name = 'Test Ad 2'
        ad2.id = 2
        ad2.content = 'Content 2'
        ad2.save()
        ad2.newspapers.add(newspaper1, newspaper2)
        ad2.save()

        ad3 = Ad()
        ad2.id = 3
        ad3.name = 'Test Ad 3'
        ad3.content = 'Content 3'
        ad3.save()

        self.browser = webdriver.Firefox()
        self.browser.implicitly_wait(3)
示例#20
0
    def get_ads(cls):
        errors = 5
        match = {'cena': 'price', 'datap': 'pub_date', 'dom_etag': 'floor_max', 'etag': 'floor',
                 'kol_komn': 'rooms_count', 'pl1': 'area_living', 'plk': 'area_kitchen', 'plosh': 'area',
                 'pl_land': 'area_land', 'textob': 'desc'}

        now = datetime.now().strftime("%Y-%m-%d")
        yesterday = (date.today() - timedelta(1)).strftime("%Y-%m-%d")

        data = {
            're_base_name': 'kvart',
            're_base_section': 'green',
            'rem': 0,
            'subq_start': 0,
            'subq_lines': 200,
            'order_by[data]': 'desc',
            'order_by[datap]': 'desc',
            'data_start': yesterday,
            'data_end': now,
        }
        statistic = {}
        log = ""
        try:
            for table in ['arenda', 'kvart', 'komn', 'domm', 'negil', 'client']:  #  all tales
                data['re_base_name'] = table
                print('Load from ' + table)
                statistic[table] = {}
                for private_type in ['green']:  #['green', 'red']:  # all bases
                    statistic[table][private_type] = 0
                    print('table type ' + private_type)
                    data['re_base_section'] = private_type
                    current = 0
                    total = 200
                    while current < total:   # all pages
                        print('select 200 starts from ' + str(current) + ', total is ' + str(total))
                        data['subq_start'] = current
                        str_data = urllib.urlencode(data)
                        content = cls.get('http://estate-in-kharkov.com/ps/re_base/ajax/real-estate-database.php', str_data)
                        current += 200
                        if 're_base_query_count' in content:
                            total = int(content['re_base_query_count'])

                        if 'items' not in content and errors:
                            print("hasn't content , errors left %d" % errors)
                            errors -= 1
                            print(content)
                            time.sleep((6-errors)*10)
                            current -= 200
                            continue
                        assert 'items' in content, 'unexpected content: ' + str(content)

                        if content['items'] is False:
                            break
                        for item in content['items'].values():
                            aid = item['kod']
                            try:
                                existed = Ad.objects.get(gkey=aid)
                                print(str(existed) + ' already imported!')
                                continue
                            except Ad.DoesNotExist:
                                pass

                            ad_item = {'gkey': aid}
                            for field in item:
                                if item[field] and field in match:
                                    ad_item[match[field]] = item[field]

                            ad = cls.parse_categories(ad_item, item, table)
                            # continue
                            if 'sub_category' in ad:
                                print(ad['sub_category'])
                            if 'rooms_count' in ad:
                                print('rooms: ' + str(ad['rooms_count']))

                            if item['textob']:
                                ad['title'] = truncatesmart(item['textob'], 45)
                                if not ad['title']:
                                    ad['title'] = item['textob'][:45]

                            ad['offering'] = table != 'client'
                            ad['private'] = private_type == 'green'
                            ad['phone'] = ""
                            for i in range(1, 5):
                                name = 'tel'+str(i)
                                if name in item and item[name].strip():
                                    if i > 1:
                                        ad['phone'] += ', '
                                    ad['phone'] += item[name].strip()

                            if item['metro']:
                                try:
                                    metro = Metro.objects.get(pk=item['metro'])
                                    ad['desc'] += ' ' + metro.name
                                except Metro.DoesNotExist:
                                    pass

                            if item['ulica']:
                                try:
                                    ad['address'] = Street.objects.get(pk=item['ulica'])
                                except Street.DoesNotExist:
                                    pass

                            if item['raj']:
                                try:
                                    ad['district'] = District.objects.get(pk=item['raj'])
                                except District.DoesNotExist:
                                    pass

                            if item['nasp']:
                                item['nasp'] = 1 if item['nasp'] == '293' else item['nasp']
                                try:
                                    ad['town'] = Town.objects.get(pk=item['nasp'])
                                except Town.DoesNotExist:
                                    continue

                            if ad['pub_date']:
                                pub_date = dateutil.parser.parse(ad['pub_date'])
                                if str(pub_date.time()) == "00:00:00":
                                    pub_date = datetime.combine(pub_date.date(), datetime.now().time())

                                pub_date_utc = pub_date.replace(tzinfo=timezone.get_current_timezone())
                                ad['pub_date'] = ad['order_date'] = pub_date_utc

                            if item['fotosite'] and item['fotosite'].find('&have_images'):
                                ad['url'] = item['fotosite'].replace('&have_images', '')

                            ad_obj = Ad(**ad)
                            ad_obj.save()
                            statistic[table][private_type] += 1

                            find_similar.delay(ad_obj.id)
                            if ad_obj.url:
                                parsed_uri = urlparse(ad_obj.url)
                                if parsed_uri.netloc in Crawler.DOMAINS:
                                    import_attachments.delay(ad_obj.id)

                            print(ad_obj)
                            time.sleep(1)
        except:
            log += "Error: " + traceback.format_exc() + "\n"
            raise
        finally:
            for name, stats in statistic.items():
                log += name + ': '
                if 'green' in stats:
                    log += str(stats['green'])
                log += " / "
                if 'red' in stats:
                    log += str(stats['red'])
                log += "\n"
            send_log(log)
示例#21
0
def get_listing_items(request, page=None, cat=None):  # this is not view, just same code for two views above
    limit = 12
    data = request.GET.copy()
    if cat:
        data['category'] = cat.id

    str_data = {}
    for k, v in data.iteritems():
        str_data[k] = unicode(v).encode('utf-8')
    params = urllib.urlencode(str_data, True)

    cache_key = 'search_' + params + '_p' + str(page or 1)
    cache_key = hashlib.md5(cache_key).hexdigest()
    cache_context = cache.get(cache_key)
    if cache_context is None:
        if cat is None and 'category' in request.GET and request.GET['category']:
            cat = Category.objects.get(pk=request.GET['category'])

        form_class = Ad.get_search_form(request, cat)
        form = form_class(data, label_suffix="")
        objects = form.search()

        #  filter by private here, cos we need all/private/business stats
        all_ads = None
        private = None
        if 'private' in data:
            all_ads = objects.count()
            objects = objects.filter(private=int(data['private'] == "private"))
            private = "private" if data['private'] == "private" else "business"

        paginator = Paginator(objects, limit)
        try:
            page = paginator.page(page or 1)
        except EmptyPage:
            raise Http404

        if private is None:
            all_ads = paginator.count
            private_c = objects.filter(private=1).count()
            business_c = all_ads - private_c
        elif private == 'private':
            private_c = paginator.count
            business_c = all_ads - private_c
        else:
            business_c = paginator.count
            private_c = all_ads - business_c
        counts = {'all': all_ads,
                  'private': private_c,
                  'business': business_c}

        # !!!! prepare data !!!!
        ids = [int(obj.pk) for obj in page.object_list]
        # for haystack results check that items still in DB and fetch them
        if page.object_list and isinstance(page.object_list[0], SearchResult):
            ads = {obj.id: obj for obj in Ad.objects.filter(id__in=ids)[:limit]}
            new_object_list = []
            for obj in page.object_list:
                if int(obj.pk) in ads:
                    new_object_list.append(ads[int(obj.pk)])
            page.object_list = new_object_list

        images = ImageAttachment.objects.all().filter(ad_id__in=ids)
        logos = {ad_id: None for ad_id in ids}
        for img in images:
            logos[img.ad_id] = img
        dist_ids = [obj.district_id for obj in page.object_list]
        districts = {dist.id: dist for dist in District.objects.all().filter(id__in=set(dist_ids))}

        if cat:
            categories = {cat.id: cat}
        else:
            cat_ids = [obj.category_id for obj in page.object_list]
            categories = {cat.id: cat for cat in Category.objects.all().filter(id__in=set(cat_ids))}

        sub_cat_ids = []
        for obj in page.object_list:
            if obj.sub_category_id:
                sub_cat_ids.append(obj.sub_category_id)
        sub_categories = {cat.id: cat for cat in SubCategory.objects.all().filter(id__in=set(sub_cat_ids))}

        new = []
        for obj in page.object_list:
            obj.category = categories[obj.category_id]
            if obj.sub_category_id:
                obj.sub_category = sub_categories[obj.sub_category_id]
            if obj.district_id:
                obj.district = districts[obj.district_id]
            obj.logo = logos[obj.id]
            new.append(obj)
        page.object_list = new

        page.paginator.object_list = page.object_list
        cache.set(cache_key, {'page': page, 'counts': counts, 'form': form}, 3600)
    else:
        counts = cache_context['counts']
        form = cache_context['form']
        page = cache_context['page']

    return page, form, counts, params
示例#22
0
	def setUp(self):
		newspaper1 = Newspaper()
		newspaper1.name = 'Test Newspaper 1'
		newspaper1.save()

		newspaper2 = Newspaper()
		newspaper2.name = 'Test Newspaper 2'
		newspaper2.save()

		newspaper3 = Newspaper()
		newspaper3.name = 'Test Newspaper 3'
		newspaper3.save()

		ad1 = Ad()
		ad1.name = 'Test Ad 1'
		ad1.id = 1
		ad1.content = 'Content 1'
		ad1.save()

		ad2 = Ad()
		ad2.name = 'Test Ad 2'
		ad2.id = 2
		ad2.content = 'Content 2'
		ad2.save()
		ad2.newspapers.add(newspaper1, newspaper2)
		ad2.save()

		ad3 = Ad()
		ad2.id = 3
		ad3.name = 'Test Ad 3'
		ad3.content = 'Content 3'
		ad3.save()

		self.browser = webdriver.Firefox()
		self.browser.implicitly_wait(3)
示例#23
0
    def setUp(self):
        ad1 = Ad()
        ad1.name = 'Test Ad 1'
        ad1.content = 'Content 1'
        ad1.save()

        ad2 = Ad()
        ad2.name = 'Test Ad 2'
        ad2.content = 'Content 2'
        ad2.save()
示例#24
0
	def setUp(self):
		self.browser = webdriver.Firefox()
		self.browser.implicitly_wait(3)

		ad1 = Ad()
		ad1.name = 'Test Ad 1'
		ad1.content = 'Content 1'
		ad1.save()

		ad2 = Ad()
		ad2.name = 'Test Ad 2'
		ad2.content = 'Content 2'
		ad2.save()

		ad3 = Ad()
		ad3.name = 'Test Ad 3'
		ad3.content = 'Content 3'
		ad3.save()
示例#25
0
    def setUp(self):
        newspaper1 = Newspaper(name='Test Newspaper 1')
        newspaper1.save()

        newspaper2 = Newspaper(name='Test Newspaper 2')
        newspaper2.save()

        newspaper3 = Newspaper(name='Test Newspaper 3')
        newspaper3.save()

        first_ad = Ad()
        first_ad.name = 'Name 1'
        first_ad.content = 'Content 1'
        first_ad.save()
        first_ad.newspapers.add(newspaper1, newspaper2)
        first_ad.save()

        second_ad = Ad()
        second_ad.name = 'Name 2'
        second_ad.content = 'Content 2'
        second_ad.save()
        second_ad.newspapers.add(newspaper2, newspaper3)
        first_ad.save()
示例#26
0
def extract_data(ad_urls,uuid):
    result = []
    attribute_dict = { 'Date Listed' : 'date' ,
                       'Price' : 'price' ,
                       'Address' : 'address',
                       'Bathrooms (#)' : 'bathrooms' ,
                       'Furnished' : 'furnished' ,
                       'Pet Friendly' : 'pet_friendly' 
                     }

    for ad_url in ad_urls : 
        try: 
            items = dict() #empty dict used to create the db object 
            items['url'] = ad_url
            #get ad data
            ad_soup = bs(opener.open(ad_url).read())
            map_link = ''

            #title 
            title = ad_soup.find('h1',id='preview-local-title').getText().replace('google_ad_section_start','').replace('google_ad_section_end','')
            items['title'] = title
           
            #table data
            for tr in ad_soup.find('table' , id='attributeTable').findAll('tr') :
                for td in tr.findAll('td') :
                    key =  td.getText()
                    if key in attribute_dict : 
                        value = td.findNext('td').getText()
                        if attribute_dict.get(key) == 'date' :
                            items['pub_date'] =  datetime.datetime.strptime(value,"%d-%b-%y")

                        if attribute_dict.get(key) == 'bathrooms' :
                            items['bathrooms'] = float(re.match('(\d+.?\d?) bathroom.*',value).group(1))

                        if attribute_dict.get(key) == 'price' :
                            if value.find('contact') > 0 :
                                items['rent'] = 0
                            else : 
                                items['rent'] = int(float(value[1:].replace(',',''))) 

                        if attribute_dict.get(key) == 'address' :
                            items['address'] = value.replace('View map','')

                        if attribute_dict.get(key) == 'furnished' or attribute_dict.get(key) == 'pet_friendly' :
                            if value == 'No':
                                items[attribute_dict.get(key)] = False
                            else:
                                items[attribute_dict.get(key)]=True
                        
            # map coordinates
            map_url = 'http://montreal.kijiji.ca' + ad_soup.find('a', attrs = { 'class' : 'viewmap-link' } ).get('href')
            map_soup = bs(opener.open(map_url).read())
            for noscript in map_soup.findAll('noscript') :
                if noscript.find('img') :
                    map_link =  noscript.find('img').get('src')
            coords = urllib2.urlparse.parse_qs(urllib2.urlparse.urlparse(map_link).query)
            lat_lng = coords['center'][0].split(',')
            items['lat'] = float(lat_lng[0])
            items['lng'] = float(lat_lng[1])

            print items
            ad = Ad(**items)
            ad.save()
            result.append(ad)
        
        except Exception as e: 
            print "[FAILED]" , ad_url
            print e
            pass #skip to the next one 

        
        cache_obj = cache.get(uuid)
        new_obj = { 'total' : cache_obj['total'] , 'found' : cache_obj['found'] + 1 }
        cache.set(uuid,new_obj)

    return result
示例#27
0
	def setUp(self):
		newspaper1 = Newspaper(name='Test Newspaper 1')
		newspaper1.save()

		newspaper2 = Newspaper(name='Test Newspaper 2')
		newspaper2.save()

		newspaper3 = Newspaper(name='Test Newspaper 3')
		newspaper3.save()

		first_ad = Ad()
		first_ad.name = 'Name 1'
		first_ad.content = 'Content 1'
		first_ad.save()
		first_ad.newspapers.add(newspaper1, newspaper2)
		first_ad.save()

		second_ad = Ad()
		second_ad.name = 'Name 2'
		second_ad.content = 'Content 2'
		second_ad.save()
		second_ad.newspapers.add(newspaper2, newspaper3)
		first_ad.save()
示例#28
0
def import_ads(file_id):
    imported_file = ImportFile.objects.get(id=file_id)
    clear_prev_ads(imported_file)
    with PersistentDict(imported_file.settings.path, 'w', format='json') as params:
    #with closing(shelve.open(file_path)) as params:
        ids = []
        count = len(params['lines'])
        i = 0
        for line in params['lines']:
            ad = {}
            for field in params['matching']:
                if params['matching'][field] != 'Constant' and 'Constant' not in params['matching'][field]:

                    poss = params['matching'][field] if type(params['matching'][field]) is list else [params['matching'][field]]
                    value = ""
                    for pos in poss:
                        pos = int(pos)
                        if len(line) < pos or not line[pos].strip():
                            continue
                        value += ", " + line[pos] if value else line[pos]

                    if not value:
                        continue
                    if field in CompareFieldsForm.compared_fields:
                        curr_comp_field = CompareFieldsForm.compared_fields[field]
                        compared_obj = curr_comp_field['model'].objects.get(pk=value)
                        if field == 'category':
                            if compared_obj.sub_category is not None:
                                ad['category'] = compared_obj.sub_category.category
                                ad['sub_category'] = compared_obj.sub_category
                            else:
                                ad['category'] = compared_obj.category
                        elif field == 'district':
                            if compared_obj.district:
                                ad['district'] = compared_obj.district
                                ad['town'] = compared_obj.district.town
                            else:
                                ad['town'] = compared_obj.town
                        else:
                            ad[field] = compared_obj.value
                    elif field == 'pub_date':
                        pub_date = dateutil.parser.parse(value)
                        if str(pub_date.time()) == "00:00:00":
                            pub_date = datetime.combine(pub_date.date(), datetime.now().time())
                        ad[field] = ad['order_date'] = pub_date
                    else:
                        ad[field] = Ad.filter_field(value, field)
                elif 'constants' in params and field in params['constants']:  # constant
                    value = Ad.filter_field(str(params['constants'][field]), field)
                    if field in CompareFieldsForm.compared_fields:
                        if field == 'category':
                            cat_values = value.split('-')
                            if len(cat_values) > 1:
                                sub_cat = SubCategory.objects.get(pk=cat_values[1])
                                ad['category'] = sub_cat.category
                                ad['sub_category'] = sub_cat
                            else:
                                cat = Category.objects.get(pk=cat_values[0])
                                ad['category'] = cat
                        elif 'foreign' in CompareFieldsForm.compared_fields[field]:
                            ad[field] = CompareFieldsForm.compared_fields[field]['foreign'].objects.get(pk=value)
                        else:
                            for key, val in CompareFieldsForm.compared_fields[field]['choices']:
                                if str(key) == value:
                                    ad[field] = val
                    else:
                        ad[field] = value
            empty = set(MatchFieldsForm.required) - set(ad.keys())
            if not empty:
                ad_obj = Ad(**ad)
                ad_obj.imported = imported_file
                ad_obj.save()
                find_similar.delay(ad_obj.id)
                if ad_obj.url:
                    parsed_uri = urlparse(ad_obj.url)
                    if parsed_uri.netloc in Crawler.DOMAINS:
                        import_attachments.delay(ad_obj.id)
                ids.append(ad_obj.id)
            i += 1
            celery.current_task.update_state(state='PROGRESS', meta={'current': i, 'total': count})
        params['ids'] = ids
        return ids