-
Notifications
You must be signed in to change notification settings - Fork 8
/
writer.py
157 lines (135 loc) · 5.13 KB
/
writer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""
Wikicopy is a program to download the articles of a dokuwiki and save them in a zip file.
==Commands==
===Help===
The -h option, the -help option, will print the help, which is this document. The example follows:
python account.py -h
===Input===
Default is http://devtome.com
The -input option sets the input file name. The example follows:
python wikicopy.py -input http://devtome.com
==Install==
For wikicopy to run, you need Python 2.x, wikicopy will probably not run with python 3.x. To check if it is on your machine, in a terminal type:
python
If python 2.x is not on your machine, download the latest python 2.x, which is available from:
http://www.python.org/download/
"""
from datetime import timedelta
import almoner
import cStringIO
import datetime
import devtome
import os
import rater
import sys
import time
__license__ = 'MIT'
globalDateTimeFormat = '%y/%m/%d %H:%M'
def getIsWriterNewProductive(name, paidNameSet):
'Determine if the writer is new and wrote at least a thousand words.'
if name.lower() in paidNameSet:
return False
totalWordCount = 0
articles = rater.getArticles(name)
for article in articles:
print( article)
totalWordCount += devtome.getWordCount(almoner.getSourceText('http://devtome.com/doku.php?id=%s&do=edit' % article))
if totalWordCount >= 1000:
return True
return False
def getPaidNameSet(previousDevtomeName):
'Get the names of the paid writers.'
lines = almoner.getTextLines(almoner.getFileText(previousDevtomeName))
paidNameSet = set([])
for line in lines[1 :]:
words = line.split(',')
if len(words) > 0:
firstWord = words[0].strip().lower()
if firstWord != '':
paidNameSet.add(firstWord)
return paidNameSet
def getRecentNames(fileName, nowDatetime, previousDevtomeName, wikiAddress):
'Get the recent user names.'
lastModifiedText = almoner.getFileText(fileName)
lastModifiedDatetime = nowDatetime - timedelta(30)
if lastModifiedText != '':
lines = almoner.getTextLines(lastModifiedText)
if len(lines) > 0:
words = lines[0].split(',')
if len(words) > 1:
lastModifiedDatetime = datetime.datetime.strptime(words[1], globalDateTimeFormat)
print('Last modified: %s' % lastModifiedDatetime)
nowMinusLast = nowDatetime - lastModifiedDatetime
paidNameSet = getPaidNameSet(previousDevtomeName)
print('Now minus last: %s' % nowMinusLast)
twentySixHours = 26 * 3600
startChangesAddress = wikiAddress + '/doku.php?do=recent&id=start&show_changes=pages&'
recentPageAddress = startChangesAddress + 'first[0]'
lineDatetime = None
dateTitle = 'class="date">'
linkTitle = 'class="wikilink1" title="'
nameTitle = 'name="'
names = []
while True:
print('Parsing: %s' % recentPageAddress)
lines = almoner.getTextLines(almoner.getInternetText(recentPageAddress))
for lineIndex, line in enumerate(lines):
if dateTitle in line:
dateLine = lines[lineIndex + 1]
dateString = dateLine[: dateLine.find('<')]
if dateString.startswith('20'):
dateString = dateString[len('20') :]
lineDatetime = datetime.datetime.strptime(dateString, globalDateTimeFormat)
if linkTitle in line:
line = line[line.find(linkTitle) + len(linkTitle) :]
name = line[: line.find('"')]
if name != 'start':
lastMinusLine = lastModifiedDatetime - lineDatetime
if getSeconds(lastMinusLine) > twentySixHours:
names.sort()
return names
if name.startswith('wiki:user:'):
name = name[len('wiki:user:') :]
if getIsWriterNewProductive(name, paidNameSet):
names.append(name)
if line.startswith('<input') and 'value="less recent' in line and nameTitle in line:
line = line[line.find(nameTitle) + len(nameTitle) :]
name = line[: line.find('"')]
recentPageAddress = startChangesAddress + name
time.sleep(1)
return None
def getSeconds(timedelta):
'Get the total number of seconds.'
return timedelta.days * 86400 + timedelta.seconds
def writeOutput(arguments):
'Write output.'
if '-h' in arguments or '-help' in arguments:
print(__doc__)
return
round = int(almoner.getParameter(arguments, '34', 'round'))
wikiAddress = almoner.getParameter(arguments, 'http://devtome.com', 'wiki')
fileNameRoot = wikiAddress
if 'http://' in fileNameRoot:
fileNameRoot = fileNameRoot[len('http://') :]
if fileNameRoot.startswith('www.'):
fileNameRoot = fileNameRoot[len('www.') :]
if '.' in fileNameRoot:
fileNameRoot = fileNameRoot[: fileNameRoot.find('.')]
previousDevtomeName = almoner.getParameter(arguments, 'devtome_%s.csv' % (round - 1), 'previous')
fileName = almoner.getParameter(arguments, 'writers.txt', 'output')
writeWriterFile(fileName, previousDevtomeName, wikiAddress)
def writeWriterFile(fileName, previousDevtomeName, wikiAddress):
'Write writer file.'
nowDatetime = datetime.datetime.today()
cString = cStringIO.StringIO()
cString.write('Date,%s' % nowDatetime.strftime(globalDateTimeFormat))
names = getRecentNames(fileName, nowDatetime, previousDevtomeName, wikiAddress)
print('Number of names: %s' % len(names))
for name in names:
cString.write('\nhttp://devtome.com/doku.php?id=wiki:user:%s' % name)
almoner.writeFileText(fileName, cString.getvalue())
def main():
'Write output.'
writeOutput(sys.argv)
if __name__ == '__main__':
main()