示例#1
0
def run(request):
    #read the user list from database
    users = database.get_user()

    for user in users:
        #read playing songs from the xiami
        titles, artists, track_times, record_time = scrobble.xiami(user)
        if titles:
            print 'titles: %s, artists: %s ' % (titles, artists)
            scrobble.lastfm(titles, artists, track_times, user)

            #modify the user information
            database.modify_user(user[0], record_time)
    return HttpResponse('running!')
示例#2
0
def xiami(user):
	headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:27.0) Gecko/20100101 Firefox/27.0',
			   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
			   'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
			   'Accept-Encoding': 'gzip, deflate',
				'DNT': '1',
				'Connection': 'keep-alive'}
	# proxies = {'http': 'http://122.226.122.201:8080'}
	xiami_url = 'http://www.xiami.com/space/charts-recent/u/%s'%(user[0])
	r = requests.get(xiami_url, headers=headers)
	soup = BeautifulSoup(r.content, 'html5lib')
	last_time = datetime.strptime(user[2], '%Y-%m-%d %H:%M:%S')
	minutes = (datetime.now() - last_time).seconds/60
	track_times = soup.findAll('td', class_='track_time')
	track_times = [re.search(u'\d+', track_time.text).group()
				   for track_time in track_times 
				   if re.search(u'分钟前', track_time.text)]
	second_html = soup.find('td', class_='track_time')
	if second_html:
		second_exist = re.search(u'秒前|刚刚', second_html.text)
	else:
		second_exist = False
	if track_times or second_exist:
		exists_times = [int(track_time) for track_time in track_times
					   if int(track_time)<10]
		track_times = [int(track_time) for track_time in track_times
					   if int(track_time)<minutes]
		#!页面中存在刚刚收听的音乐时间小于十分钟则将times继续设为0
		record_time = None
		if track_times:
			record_time = datetime.now() - timedelta(minutes=track_times[0])
			record_time = record_time.strftime('%Y-%m-%d %H:%M:%S')

		track_times = [int(time.time()-track_time*60) for track_time in track_times]
		if second_exist:
			record_time = datetime.now()
			record_time = record_time.strftime('%Y-%m-%d %H:%M:%S')
			track_times.insert(0, int(time.time()))

		track_number = len(track_times)
		if record_time:
			track_htmls = soup.findAll('tr', id=re.compile('track_\d+'), limit=track_number)
			upper_htmls = [track_html.find('td', class_='song_name') for track_html in track_htmls]
			artists_html = [artist_html.findAll('a')[1:] for artist_html in upper_htmls]
			artists = []
			for artist in artists_html:
				all_artists = [one_artist.text for one_artist in artist
								if not re.search('http://i.xiami.com',
												 one_artist['href'])]
				all_artist = '&'.join(all_artists)
				artists.append(all_artist)
			title_htmls = soup.findAll('a', href=re.compile('/song/\d+'), limit=track_number)
			titles = [title['title'] for title in title_htmls]
			return (titles, artists, track_times, record_time)
		elif exists_times:
			database.modify_user(user[0], user[2])
			return (None, None, None, None)
		else:
			database.not_listening(user[0])
			return (None, None, None, None)
	else:
		database.not_listening(user[0])
		return (None, None, None, None)
示例#3
0
def xiami(user):
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:27.0) Gecko/20100101 Firefox/27.0',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate',
        'DNT': '1',
        'Connection': 'keep-alive'
    }
    # proxies = {'http': 'http://122.226.122.201:8080'}
    xiami_url = 'http://www.xiami.com/space/charts-recent/u/%s' % (user[0])
    r = requests.get(xiami_url, headers=headers)
    soup = BeautifulSoup(r.content, 'html5lib')
    last_time = datetime.strptime(user[2], '%Y-%m-%d %H:%M:%S')
    minutes = (datetime.now() - last_time).seconds / 60
    track_times = soup.findAll('td', class_='track_time')
    track_times = [
        re.search(u'\d+', track_time.text).group()
        for track_time in track_times if re.search(u'分钟前', track_time.text)
    ]
    second_html = soup.find('td', class_='track_time')
    if second_html:
        second_exist = re.search(u'秒前|刚刚', second_html.text)
    else:
        second_exist = False
    if track_times or second_exist:
        exists_times = [
            int(track_time) for track_time in track_times
            if int(track_time) < 10
        ]
        track_times = [
            int(track_time) for track_time in track_times
            if int(track_time) < minutes
        ]
        #!页面中存在刚刚收听的音乐时间小于十分钟则将times继续设为0
        record_time = None
        if track_times:
            record_time = datetime.now() - timedelta(minutes=track_times[0])
            record_time = record_time.strftime('%Y-%m-%d %H:%M:%S')

        track_times = [
            int(time.time() - track_time * 60) for track_time in track_times
        ]
        if second_exist:
            record_time = datetime.now()
            record_time = record_time.strftime('%Y-%m-%d %H:%M:%S')
            track_times.insert(0, int(time.time()))

        track_number = len(track_times)
        if record_time:
            track_htmls = soup.findAll('tr',
                                       id=re.compile('track_\d+'),
                                       limit=track_number)
            upper_htmls = [
                track_html.find('td', class_='song_name')
                for track_html in track_htmls
            ]
            artists_html = [
                artist_html.findAll('a')[1:] for artist_html in upper_htmls
            ]
            artists = []
            for artist in artists_html:
                all_artists = [
                    one_artist.text for one_artist in artist
                    if not re.search('http://i.xiami.com', one_artist['href'])
                ]
                all_artist = '&'.join(all_artists)
                artists.append(all_artist)
            title_htmls = soup.findAll('a',
                                       href=re.compile('/song/\d+'),
                                       limit=track_number)
            titles = [title['title'] for title in title_htmls]
            return (titles, artists, track_times, record_time)
        elif exists_times:
            database.modify_user(user[0], user[2])
            return (None, None, None, None)
        else:
            database.not_listening(user[0])
            return (None, None, None, None)
    else:
        database.not_listening(user[0])
        return (None, None, None, None)