Пример #1
0
def get_json(topic):
    r = requests.get('http://itunes.apple.com/search?term=' + topic +
                     '&entity=software&limit=300')
    j = simplejson.loads(r.content)
    for i in j['results']:
        if not Apps.objects.filter(name=get_name(i)):
            if (i['primaryGenreName'] in ['Education', 'Reference']) or (
                ('Education' in i['genres']) and
                (i['primaryGenreName'] in acceptable_topics)):
                try:
                    m1 = Apps(name=get_name(i),
                              description=get_description(i),
                              creator=get_creator(i),
                              subject=i['primaryGenreName'],
                              price=get_price(i),
                              rating=get_rating(i),
                              artwork=get_artwork(i),
                              link=get_link(i))
                    # date_added=datetime.datetime.now()) <-- check if needed
                    m1.save()
                    print get_name(i) + " was added to the database! ", topic
                except:
                    print get_name(i), get_link(i)
                    raise
            else:
                print get_name(i) + " is not an Education app! " + str(
                    i['genres'])
Пример #2
0
def add_to_db(d):
	for entry in d:
		if not Apps.objects.filter(name=entry):
			m1 = Apps(name=entry, platform=d[entry]['platform'], 
				creator=d[entry]['creator'], subject=d[entry]['subject'], 
				price=d[entry]['price'], rating=d[entry]['rating'],
				artwork=d[entry]['artwork'], link=d[entry]['link'])
			m1.save()
		else:
			print "The app '%s' is already in the database!" % entry
Пример #3
0
def add_update(link, val):
    # future: find downloads, age, comment
    soup = get_soup(link)

    name = get_name(soup)
    creator = get_creator(soup)
    platform = get_platform(soup)
    subject = get_subject(soup)
    if subject == 'NotEducation':  # change to "creator == 'NotEducation', set in classifier"
        print "The app '%s' is not Education-related and was not added to the database!" % name
    else:
        price = get_price(soup)
        rating = get_rating(soup)
        artwork = get_artwork(soup)

        if val == 0:
            m1 = Apps(name=name,
                      platform=platform,
                      creator=creator,
                      subject=subject,
                      price=price,
                      rating=rating,
                      artwork=artwork,
                      link=link,
                      date_added=datetime.datetime.now())
            m1.save()
            print "The app '%s' was successfully added to the database!" % name
        elif val == 1:
            m1 = Apps.objects.get(link=link)
            m1.update(name=name,
                      platform=platform,
                      creator=creator,
                      subject=subject,
                      price=price,
                      rating=rating,
                      artwork=artwork,
                      link=link)
            print "The app '%s' was successfully updated!" % name
        else:
            print "What the hell are you doing with your code!?"
Пример #4
0
def page_to_db(link):
    # downloads, age, comment

    foobar = requests.get(link).text
    soup = BeautifulSoup(foobar)

    # get_name
    try:
        a = re.compile('<title>.+</title>').findall(str(soup))
        title = a[0]
        begin, end = title.index('>') + 1, title.index('<', 1)
        full_name = title[begin:end]
        if 'App Store - ' in full_name:
            name = full_name[full_name.index('Store - ') + 8:]
        elif ' for iP' in full_name:
            name = full_name[:full_name.index(' for iP')]
        else:
            name = full_name
    except (AttributeError, ValueError, TypeError):
        name = link
        print "Error retrieving name from %s." % link

    # get_creator
    try:
        a = soup.find('div', {'class': 'lockup product application'})
        b = str(
            a.find('li', {
                'class': 'copyright'
            }).previous_sibling.get_text())
        creator = b[8:]
    except (AttributeError, ValueError, TypeError):
        creator = ''
        print "Error retrieving creator for %s." % name

    # get_platform
    try:
        platform = list(soup.find_all('a', {'metrics-loc': 'Pill_'}))  # list
        for key, value in enumerate(platform):
            platform[key] = re.compile('>[^<]*<').findall(
                str(value))[0].lstrip('>').rstrip('<')
        platform = '/'.join(platform[:])
    except (AttributeError, ValueError, TypeError):
        platform = ''
        print "Error retrieving platform for %s." % name

    # get_subject
    try:
        a = soup.find('div', {'class': 'lockup product application'})
        subject = str(
            a.find('span', {
                'class': 'label'
            }).next_sibling.get_text())
    except (AttributeError, ValueError, TypeError):
        subject = ''
        print "Error retrieving subject for %s." % name

    # get_price
    try:
        price_text = soup.find('div', {'class': 'price'}).get_text()
        try:
            price = float(price_text[1:])  # get rid of $ in front
        except ValueError:
            if price_text == ("Free" or "free"):
                price = float(0)
            else:
                price = None
                print "Error retrieving float-type price for %s." % name
    except (AttributeError, TypeError):
        price = None
        print "Error retrieving price for %s." % name

    # get_rating
    try:
        rating_text = soup.find('div', {'class': 'rating'}).get('aria-label')
        rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text)
        rating = float(rating_num[0])
    except AttributeError:
        try:
            rating_text = soup.find('div', {
                'class': 'app-rating'
            }).a.get_text()
            rating_num = re.compile('[0-5]\.?[0-9]*').findall(rating_text)
            rating = float(rating_num[0])
        except (AttributeError, IndexError, TypeError, ValueError):
            rating = None
    except (IndexError, ValueError, TypeError):
        rating = None
        print "Error retrieving rating for %s." % name

    # get_artwork
    try:
        artwork = str(soup.find('img', {'class': 'artwork'}).get('src'))
    except (AttributeError, ValueError, TypeError):
        artwork = ''
        print "Error retrieving artwork for %s." % name

    m1 = Apps(name=name,
              platform=platform,
              creator=creator,
              subject=subject,
              price=price,
              rating=rating,
              artwork=artwork,
              link=link,
              crawl_binary=0,
              date_added=datetime.datetime.now())
    m1.save()

    print "The app '%s' was successfully added to the database!" % name