-
Notifications
You must be signed in to change notification settings - Fork 0
/
link1.py
80 lines (67 loc) · 2.05 KB
/
link1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#! /usr/bin/python
import bleach
import re
import httplib2
import socket
from bs4 import BeautifulSoup, SoupStrainer
import requests
import urllib2
import csv
import socialshares
f = open("link1.txt", "r")
k = 0
#name = str(k)+".txt"
for links in f:
# print(links[0], links[-2])
if links[0]=="h" and links[-2]=="/":
name = str(k)+".txt"
url = links
# print(url)
try:
page = urllib2.urlopen(url)
soup = BeautifulSoup(page, 'html.parser')
data = soup.title.string
data = data.encode('utf-8').decode('ascii', 'ignore')
print(data)
authour = soup.find("a", attrs={"class":"aboutAuthor_name inline-block mr-5"})
val = authour.text.split()
s1 = str(val[0])
s2 = str(val[1])
author = s1 + " " + s2
sharecount = soup.find("p", attrs={"class":"postInfo color-grey mt-5 fr"})
val1 = sharecount.text.split()
date = str(val1[1])
month = str(val1[2])
year = str(val1[3])
val3 = soup.find("div", attrs={'class':'ys_post_content text'})
contents = bleach.clean(val3,tags=[],styles=[],strip=True)
contents = contents.encode('utf-8').decode('ascii', 'ignore')
counts = socialshares.fetch(links, ['facebook', 'linkedin'])
facebook = counts['facebook']
linkedin = counts['linkedin']
with open('index1.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([date, month, year, author, data, facebook, linkedin, contents])
except:
continue
# f = open(name, "a")
# if soup.title.string:
# print(soup.title.string)
# f.write(soup.title.string)
# for i in soup.find("div", class_="fl mb-10"):
# f = open("1.txt", "a")
# f.write(str(i))
# for i in soup.find("div", class_="ys_post_content text"):
# f = open("1.txt", "a")
# f.write(str(i))
'''
for data in soup.find("div", class_="ys_post_content text"):
with open('link1.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([data])
data = soup.title.string
with open('index.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([data])
'''
# k = k + 1