Exemplo n.º 1
0
def getdate(uri):
	uri = uri.strip()
	print 'Searching for uri: {}'.format(uri)
	uri_json = json.loads(cd(uri))
	if uri_json[ECD]:
		print 'Found creation date: {}'.format(uri_json[ECD])
		return (uri, uri_json[ECD])
	else:
		print 'Found no ECD'
		return None
Exemplo n.º 2
0
# =====================================================================
# Iterate it for 1000 urls
# 1000 urls should be taken from twitter, based on 'Problem 1'
# so just read file 'urls.csv'
# ---------------------------------------------------------------------
age_mementos = []
no_memento = 0
no_creation_date = 0

with open('urls.csv', 'rb') as f:
   reader = csv.reader(f)
   for row in reader:
      url = row[0]
      
      # call function named 'cd' inside 'local.py'
      result = local.cd(url, True)
      result = json.loads(result)
      creationDate = result['Estimated Creation Date']
      mementos = result['Archives'][1][1]
      numMementos = len(mementos)
      if numMementos > 0 and not creationDate == "":
         # because creationDate is in unicode format, so we need to convert it
         creationDate = datetime.strptime(creationDate, '%Y-%m-%dT%H:%M:%S')
         age = datetime.now() - creationDate
         ageInDays = age.days
         # save it in array
         age_mementos.append([numMementos, ageInDays])
         
      if numMementos == 0:
         no_memento = no_memento + 1
         
Exemplo n.º 3
0
import local

with open("links.txt") as f:
    ListOfLinks = f.readlines()

fOut = open("DateEstimation.txt", "w")
i = 0

for link in ListOfLinks:

   #get URI
   uri = link.split(" ")
   print uri[2], " ", uri[3]

   i+=1
   maybe = local.cd(link)
   maybe = maybe.split('Estimated Creation Date": "')
   maybe = str(maybe[1])
   maybe = maybe.split('"')
   print "\n", i, ": ", uri[2] ," ", uri[3] , " " ,maybe[0]
   fOut.write(str(i))
   fOut.write(": ")
   fOut.write(uri[2])
   fOut.write(" ")
   fOut.write(uri[3])
   fOut.write(" ")
   if maybe[0] != "":
      fOut.write(maybe[0])
   else:
      fOut.write("NODATE")
   fOut.write("\n")
Exemplo n.º 4
0
from local import cd
import json
import datetime
time_now=datetime.datetime.now()
data=open("../../momento.list")
outputf=open('../../cd.list','w')
strline=data.readlines()
counter=0
for line in strline:
	col=line.split('\t')
	memento=int(col[1])
	if memento>0:
		r=cd(col[0])
		re=json.loads(r)
		if re['Estimated Creation Date'] != '':
			createTime=datetime.datetime.strptime(re['Estimated Creation Date'],'%Y-%m-%dT%H:%M:%S')
			deltaTime=time_now-createTime
			outputf.write(str(deltaTime.days)+"\t"+str(col[1]).strip()+"\n")
	counter=counter+1
	print(str(counter/10.0)+"%")
data.close()
outputf.close()