for movie in movies:
    revenue = int(movie['revenue'])
    tlist = json.loads(movie['keywords'])
    for t in tlist:
        name = t['name']

        # print(year, revenue)
        item = [name, revenue]
        year_revenue.append(item)

print(year_revenue)

print('----#--------#--------#----')
my_list2 = []
keyword_revenue = []
keyword_revenue.append(['keyword', 'revenue'])
justwords = []
for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]):
    count_v = sum(v[1] for v in g)
    if count_v > 5893668099:
        my_list2.append({'word': i, 'size': int(count_v / 500000000)})
        keyword_revenue.append([i, count_v])
        justwords.append(i)
print(my_list2)
print(" ".join(justwords))
csv_write(keyword_revenue, 'i4keyword_revenue.csv')
'''
[{'word': '3d', 'size': 77}, {'word': 'aftercreditsstinger', 'size': 72}, {'word': 'airplane', 'size': 14}, {'word': 'alien', 'size': 29}, {'word': 'animation', 'size': 19}, {'word': 'based on comic book', 'size': 42}, {'word': 'based on novel', 'size': 55}, {'word': 'based on young adult novel', 'size': 17}, {'word': 'battle', 'size': 17}, {'word': 'best friend', 'size': 12}, {'word': 'biography', 'size': 12}, {'word': 'cia', 'size': 13}, {'word': 'conspiracy', 'size': 13}, {'word': 'daughter', 'size': 15}, {'word': 'dc comics', 'size': 14}, {'word': 'dinosaur', 'size': 13}, {'word': 'duringcreditsstinger', 'size': 113}, {'word': 'dying and death', 'size': 18}, {'word': 'dystopia', 'size': 42}, {'word': 'elves', 'size': 13}, {'word': 'escape', 'size': 15}, {'word': 'explosion', 'size': 15}, {'word': 'family', 'size': 12}, {'word': 'father son relationship', 'size': 14}, {'word': 'friendship', 'size': 22}, {'word': 'future', 'size': 17}, {'word': 'hero', 'size': 13}, {'word': 'imax', 'size': 27}, {'word': 'island', 'size': 13}, {'word': 'london england', 'size': 13}, {'word': 'los angeles', 'size': 13}, {'word': 'love', 'size': 16}, {'word': "love of one's life", 'size': 11}, {'word': 'magic', 'size': 29}, {'word': 'martial arts', 'size': 13}, {'word': 'marvel cinematic universe', 'size': 20}, {'word': 'marvel comic', 'size': 38}, {'word': 'mission', 'size': 16}, {'word': 'monster', 'size': 12}, {'word': 'murder', 'size': 17}, {'word': 'musical', 'size': 16}, {'word': 'new york', 'size': 13}, {'word': 'orcs', 'size': 12}, {'word': 'rescue', 'size': 13}, {'word': 'revenge', 'size': 24}, {'word': 'robot', 'size': 11}, {'word': 'saving the world', 'size': 23}, {'word': 'scientist', 'size': 12}, {'word': 'secret agent', 'size': 13}, {'word': 'secret identity', 'size': 18}, {'word': 'sequel', 'size': 51}, {'word': 'ship', 'size': 16}, {'word': 'soldier', 'size': 12}, {'word': 'space', 'size': 16}, {'word': 'space opera', 'size': 13}, {'word': 'spy', 'size': 15}, {'word': 'super powers', 'size': 15}, {'word': 'superhero', 'size': 52}, {'word': 'suspense', 'size': 15}, {'word': 'terrorist', 'size': 14}, {'word': 'time travel', 'size': 13}, {'word': 'undercover', 'size': 14}, {'word': 'violence', 'size': 29}, {'word': 'war', 'size': 13}, {'word': 'witch', 'size': 18}, {'word': 'woman director', 'size': 30}]

'''
import re
p = re.compile(r'@([^\s:]+)')

# test_str = "@galaxy5univ I like you\nRT @BestOfGalaxies: Let's sit under the stars ...\n@jonghyun"
# p2 = re.compile(r'(?:http|ftp|https)://(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?')
# print(p2.findall(test_str))
# # => ['galaxy5univ', 'BestOfGalaxies', 'jonghyun__bot', 'yosizo', 'LDH_3_yui']
# # => ['https://yahoo.com', 'https://msn.news.com']

#---------- ------ ------------------------------ ------------------
# path_to_file = 'twitter_data/elonmusk_following.txt'
path_to_file = 'twitter_data/pualg_follwing.txt'

text = ''
with open(path_to_file) as file_object:  # this is a safe way of opening files
    for line in file_object:
        text += line

list0 = p.findall(text)
print(len(list0))
list1 = list(set(list0))
print(len(list1))

mydata = []
for item in list1:
    mydata.append([item])

# csv_write(mydata, 'twitter_data/elonmusk_following.csv')
csv_write(mydata, 'twitter_data/pualg_follwing.csv')
	tlist = json.loads(movie['production_countries'])
	for t in tlist:
		name = t['name']
		if movie['vote_count'] is not None:
			vote_count = float(movie['vote_count'])
		else:
			vote_count = 0
		# print(year, revenue)
		item = [name, vote_count]
		year_revenue.append(item)

print(year_revenue)

print('----#--------#--------#----')
mylist = []
mylist.append(["Country", "Value"])
for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]):
	count_v = sum(v[1] for v in g)
	if count_v > 30:
		mylist.append([i, count_v])





print(mylist[0:10])
print('----#--------#--------#----')


csv_write(mylist, 'i2country_vote.csv')
示例#4
0
print('----#--------#--------#----')
mylist = []
genres = []

mylist.append(["genres", "vote_average"])
for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]):
    count_v = sum(v[1] for v in g)
    if count_v > 30:
        mylist.append([i, count_v])
        genres.append(i)

from random import randint

# x = [randint(0, 90) for p in range(0, len(genres))]
# print(x)

# x = [randint(0, 1961) for p in range(0, len(genres))]
# print(x)

# x = [randint(0, 2492) for p in range(0, len(genres))]
# print(x)

# x = [randint(0, 1202) for p in range(0, len(genres))]
# print(x)

print(mylist[0:10])
print('----#--------#--------#----')

csv_write(mylist, 'i2genres_vote.csv')
mylist.append(["budget", "revenue", "popularity", "runtime", "vote_average", "vote_count"])
for movie in movies:

	budget =  float(movie['budget'])/1000000
	revenue =  float(movie['revenue'])/1000000
	popularity = float(movie['popularity'])
	if movie['runtime']!= '':
		runtime = float(movie['runtime'])
	else:
		runtime = 0
	if movie['vote_average'] is not None:
		vote_average = float(movie['vote_average'])
	else:
		vote_average = 0
	if movie['vote_count'] is not None:
		vote_count = float(movie['vote_count'])
	else:
		vote_count = 0
	item = [budget, revenue, popularity, runtime, vote_average, vote_count]
	mylist.append(item)





print(mylist[0:10])
print('----#--------#--------#----')


csv_write(mylist, 'i1one_to_one.csv')
示例#6
0
# test_str = "@galaxy5univ I like you\nRT @BestOfGalaxies: Let's sit under the stars ...\n@jonghyun"
# p2 = re.compile(r'(?:http|ftp|https)://(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?')
# print(p2.findall(test_str))
# # => ['galaxy5univ', 'BestOfGalaxies', 'jonghyun__bot', 'yosizo', 'LDH_3_yui']
# # => ['https://yahoo.com', 'https://msn.news.com']


# ---------- ------ ------------------------------ ------------------
# path_to_file = 'twitter_data/elonmusk_following.txt'
path_to_file = "twitter_data/pualg_follwing.txt"
# path_to_file = "twitter_data/pualg_follwing_windows.txt"


text = ""
with open(path_to_file,'r', encoding='UTF-8') as file_object:  # this is a safe way of opening files
    for line in file_object:
        text += line

list0 = p.findall(text)
print(len(list0))
list1 = list(set(list0))
print(len(list1))

mydata = []
for item in list1:
    mydata.append([item])

# csv_write(mydata, 'twitter_data/elonmusk_following.csv')
csv_write(mydata, "twitter_data/pualg_follwing.csv")
for movie in movies:
	year = movie['release_date'][:4]
	revenue =  float(movie['revenue'])
	# print(year, revenue)
	item = [year, revenue]
	year_revenue.append(item)

print(year_revenue)

print('----#--------#--------#----')
my_list2 = []
for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]):
    my_list2.append([i, sum(v[1] for v in g)])

print(my_list2)
csv_write(my_list2, 'year_revenue_sum.csv')


def my_mean(values):
    n = 0
    Sum = 0.0
    for v in values:
        Sum += v
        n += 1
    return Sum / n

print('----#--------#--------#----')
my_list3 = []
for i, g in groupby(sorted(year_revenue), key=lambda x: x[0]):
    my_list3.append([i, my_mean(v[1] for v in g)])