Пример #1
0
def main():
    #Reading raw data from website
    url = "http://www.facet.unt.edu.ar/facetinforma/category/becas/"
    try:
        data = urlopen(url)
        text = data.read().decode('utf-8')
    except:
        return
    title = text[text.find("entry-title") + 13:]
    title = title[title.find(">") + 1:title.find("</a>")]

    #Getting ISO 8601 format date
    text = text[text.find("datetime"):text.find("</time")]
    text = text[text.find("\"") + 1:text.find("\">")]

    #Datetime object
    date = dateParser(text)

    #Checks last date. If it's the first time sets the last date
    try:
        f = open("last", "r")
    except FileNotFoundError:
        f = open("last", "w")
        f.write(text)
        f.close()
        f = open("last", "r")

    lastdate = dateParser(f.read())

    if date > lastdate:
        subprocess.call(['notify-send', "Facet Informa", title])
        f = open("last", "w")
        f.write(text)
        f.close()
Пример #2
0
def getDaterange(input_daterange):
	# print ("input_daterange: %s" % input_daterange)
	strStartDate, strEndDate = input_daterange.split(" - ")
	intCompensationSec=float(32400)
	# print ("strStartDate: %s, strEndDate:%s" % (strStartDate,strEndDate))
	# startDate=datetime.fromtimestamp(float(dateParser(strStartDate).strftime('%s'))+intCompensationSec)
	startDate=datetime.fromtimestamp(float(dateParser(strStartDate).strftime('%s')))

	# endDate  =datetime.fromtimestamp(float(dateParser(strEndDate  ).strftime('%s'))+intCompensationSec)
	endDate  =datetime.fromtimestamp(float(dateParser(strEndDate  ).strftime('%s')))

	# print ("startDate:%s, endDate: %s" %(startDate, endDate))
	return startDate, endDate
Пример #3
0
 def parseArxiv(self, doc_id):
     self.title = self.metadata["title"]
     self.title_vector = bc.encode([self.title])[0].tolist()
     self.pdf_file = doc_id
     self.parent_item = False
     self.tags = self.metadata['tags']
     self.url = self.metadata["arxiv_url"]
     if self.metadata["published"]:
         self.year = dateParser(self.metadata["published"]).year
     else:
         self.year = False
     self.citations = False
     self.versions = False
     self.clusterID = False
     self.citations_list = False
     self.notes = False
     authors_paper = self.metadata["authors"]
     self.authors = []
     for each in authors_paper:
         self.authors.append({
             "firstName": each.split(' ')[0],
             "lastName": ' '.join(each.split(' ')[1:])
         })
     self.abstract = self.metadata['summary']
     self.type = False
     # pdb.set_trace()
     self.globalID = doc_id
     # self.conference = self.metadata["publicationTitle"]
     self.organization = self.metadata['affiliation']
     self.pages = False
     self.citationArticles = False
Пример #4
0
 def parseMetadata(
     self
 ):  #I have 2 parent items and 2 notes, 2 tags I have to put them together in the near future only reading x now
     self.title = self.metadata["name"]
     self.title_vector = bc.encode([self.title])[0].tolist()
     self.pdf_file = self.metadata['pdf_file']
     self.parent_item = self.metadata['parentItem_x']
     self.tags = self.metadata['tags_x']
     self.url = self.metadata["url"]
     if self.metadata["date"]:
         self.year = dateParser(self.metadata["date"]).year
     else:
         self.year = False
     self.citations = False
     self.versions = False
     self.clusterID = False
     self.citations_list = False
     self.notes = self.metadata["note_x"]
     self.authors = self.metadata["creators"]
     self.abstract = False
     self.type = self.metadata["itemType"]
     # pdb.set_trace()
     self.globalID = self.metadata['key']
     # self.conference = self.metadata["publicationTitle"]
     self.organization = False
     self.pages = False
     self.citationArticles = False
Пример #5
0
def main():
    feed_url = 'http://alerts.weather.gov/cap/wwaatmget.php?x=%s' % (loc)
    
    doc = objectify.parse(feed_url)
    tree = doc.getroot()
    
#     print(objectify.dump(tree))
    
    for e in tree.entry:
#         print e.title
#         print e.summary
        print 'What: %s' % e['{urn:oasis:names:tc:emergency:cap:1.1}event']
        print '    Where: %s' % e['{urn:oasis:names:tc:emergency:cap:1.1}areaDesc'].text
        expires = dateParser(e['{urn:oasis:names:tc:emergency:cap:1.1}expires'].text)
        print '    Ends: %s' % expires.strftime('%A, %B %d, %Y, %I:%M %p')
Пример #6
0
 def parseMetadata(self): #I have 2 parent items and 2 notes, 2 tags I have to put them together in the near future only reading x now
     self.title = self.metadata["name"]
     self.pdf_file = self.metadata['pdf_file']
     self.parent_item = self.metadata['parentItem_x']
     self.tags = self.metadata['tags_x']
     self.url = self.metadata["url"]
     self.year = dateParser(self.metadata["date"]).year
     self.citations = False
     self.versions = False
     self.clusterID = False
     self.citations_list = False
     self.notes = self.metadata["note_x"]
     zotero_authors = self.metadata["creators"]
     self.abstract = False
     self.type = self.metadata["itemType"]
     self.globalID = self.metadata['key']
     self.scholarID = False
     # self.conference = self.metadata["publicationTitle"]
     self.organization = False
     self.pages = False
     self.citationArticles = False
     self.authors = pd.DataFrame()
     # ##We create the Authors here too
     for eachAuthor in zotero_authors:
         author = eachAuthor['firstName'] + " " +  eachAuthor['lastName']
         # pdb.set_trace()
         if self.session.searchAuthor(author) == False:
             # self.author.append(scholar.get_author_data(author))
             author_data = scholar.fast_get_author_data(author)
             # Need to change arrays to strings to save as PD
             author_data['Interests'] = str(author_data['Interests'])
             author_data['Paper_Ids'] = [self.globalID] 
             author_data_df = pd.DataFrame.from_records(author_data,index=[author_data['Author']])
             self.authors = self.authors.append(author_data_df)
             self.session.addAuthor(author_data_df,self.globalID)
         else:
             print "Author was in Session"
             #Get the author from session
             author_frame = self.session.returnAuthor(eachAuthor)
             #Append to the document
             self.authors.append(author_frame)
Пример #7
0
def csvToMongo(csvString):
	Seoul=pytz.timezone('Asia/Seoul')
	data=csvToJson(csvString)
	# print data
	# data=json.dumps(jsonData)
	mongoRetval=[]
	# print "data", data
	for elem in data:
		# PreProcessing Rule 
		# print "elem: %s" % elem
		# print type(elem)
		# print "+="*40

		elem['Size']=int(elem['Size'])
		print elem['Date']
		elem['Date']=elem['Date']+" 09:01:00"
		print elem['Date']
		elem['Date']=dateParser(elem['Date'])
		elem['Severity']=int(elem['Severity'])
		if elem['Threat_Name'].lower()=="none" or elem['Threat_Name']=="":
			elem['Threat_Name']=None
		if elem.has_key("CRC64") is not True:
			elem['CRC64']=None

		try:
			elem.pop('Unnamed: 0')
		except:
			pass

		# Processing From here.
		Date=Seoul.localize(elem['Date'])
		elem.pop('Date')

		File={
			"Name": elem['FileName'],
			"Type": elem['Type'],
			"MD5" : elem['MD5'],
			"CRC64":elem['CRC64'],
			"Size": elem['Size']
		}
		elem.pop('FileName')
		elem.pop('Type')
		elem.pop('MD5')
		elem.pop('CRC64')
		elem.pop('Size')

		behaviorCount=0
		if elem['BeaviorCount']:
			behaviorCount=elem['BeaviorCount']
			elem.pop('BeaviorCount')
		elif elem['BehaviorCount']:
			behaviorCount=elem['BehaviorCount']
			elem.pop('BehaviorCount')

		Threat={
			"Severity":elem['Severity'],
			"Name": elem['Threat_Name'],
			"VM_Severity":elem['Result'],
			"behaviorCount":behaviorCount,
		}
		elem.pop('Severity')
		elem.pop('Result')

		Results={}
		for key, val in elem.items():
			# result(BENIGN|MALICIOUS|SUSPICIOUS) categorization
			if key=='':
				pass
			else:
				if val=="MALICOUS":
					val="MALICIOUS"
				if val in ['not found', 'Not found', 'None', 'none', 'Clean', 'BENIGN', '', None]:
					result="BENIGN"
					reason=val
				else:
					result=val
					reason=val

				if key.find("DICA") >= 0:
					Engine="DICA"
					EngineVersion=key.replace("DICA_","")
					Result=result
					Reason=reason
				elif key.find("VM_Threat_Name") >= 0:
					Engine="MDP_VM"
					if val.find("/") >= 0 :
						EngineVersion=0
						Reason=reason
						if reason!="None":
							Result="MALICIOUS"
						else:
							Result="BENIGN"
							Reason=None
					else:
						EngineVersion=0
						Result="BENIGN"
						Reason=None
				elif key.find("AhnLab-V3") >= 0 or key.find("Threat_Name")>=0:
					Engine="V3"
					EngineVersion="AhnLab-V3"
					Reason=reason
					if result!="BENIGN":
						Result="MALICIOUS"
					else:
						Result=result
				elif key.find("Heimdal")>=0:
					Engine="Heimdal"
					EngineVersion=key
					Result=result
					Reason=reason
					if reason.find("/") >=0:
						Result=result.split("/")[0]
						Reason=result.split("/")[1]
				elif key.find("VirusTotal") >= 0:
					Engine="VirusTotal"
					if val.find("/") >= 0:
						EngineVersion=int(reason.split("/")[1])
						Reason=int(reason.split("/")[0])
						if int(reason.split("/")[0])>0:
							Result="MALICIOUS"
						else:
							Result="BENIGN"
					else:
						EngineVersion=0
						Result="BENIGN"
						Reason=0
				else:
					Engine=key
					EngineVersion=key
					Result=result
					Reason=reason



				# print "Engine: %s" % Engine
				Results.update({
					Engine: {
						"Version":EngineVersion,
						"Result":Result,
						"Reason":Reason,
					}
				})
			# print Results
			# elem.pop(key)

		retval={
				"Date": Date,
				"File": File,
				"Threat": Threat,
				"Results": Results
		}
		# ret.append(retval)
		# print "Insert: %s" % retval
		mongoRetval.append(col_enginediff.insert(retval))
	return mongoRetval
Пример #8
0
                    headline = keywords
                    body_text = body_text.replace('&lt;</td>', '</td>')
                    body_text = body_text.replace(';', '</td><td>')
                    body_text = body_text.replace('<td>Name Ticker High Low Last Change Change Ratio</td>', '<th>Name</th><th>Ticker</th><th>High</th><th>Low</th><th>Last</th><th>Change</th><th>Change Ratio</th>')

                    time_stamp_string = re.search(r'\d{12}', body_text)
                    if time_stamp_string:
                        stock_info_datetime = datetime.datetime.strptime(time_stamp_string.group(0), '%Y%m%d%H%M')
                        # Adjust time from Eastern time zone.
                        stock_info_datetime = stock_info_datetime - datetime.timedelta(hours=3)
    #                   pretty_stock_info_datetime = stock_info_datetime.strftime('%A, %B %d, %Y, %I:%M %p')
                        pretty_stock_info_datetime = date(stock_info_datetime, 'P, N j, Y')
                        body_text = body_text.replace(time_stamp_string.group(0), pretty_stock_info_datetime)
                APStory_instance = APStory(
    #                 category = ap_cat,
                    updated = dateParser(e.updated.text),
                    published = dateParser(e.published.text),
                    management_id = management_id,
                    consumer_ready = consumer_ready,
                    media_type = e['{http://ap.org/schemas/03/2005/apcm}ContentMetadata'].MediaType.text,
                    priority_numeric = e['{http://ap.org/schemas/03/2005/apcm}ContentMetadata'].Priority.attrib['Numeric'],
                    priority_legacy = e['{http://ap.org/schemas/03/2005/apcm}ContentMetadata'].Priority.attrib['Legacy'],
                    subject_code = ap_subject_code,
                    location = location,
                    contributor = contributor,
                    contributor_uri = contributor_uri,
                    byline = byline,
                    byline_title = byline_title,
                    slugline = e['{http://ap.org/schemas/03/2005/apcm}ContentMetadata'].SlugLine.text,
                    title = e.title.text,
                    keywords = keywords,
Пример #9
0
    def _createGameFromTag(self, game_tag, base_url, rootElement):
        titleTag = game_tag.find('GameTitle')
        idTag = game_tag.find('id')
        platformTag = game_tag.find('Platform')
        platformIDTag = game_tag.find('PlatformId')
        imagesTag = game_tag.find('Images')
        genresTag = game_tag.find('Genres')
        overview = game_tag.find('Overview')
        release_date = game_tag.find('ReleaseDate')
        trailer = game_tag.find('Youtube')

        if titleTag is None or idTag is None or platformTag is None or platformIDTag is None:
            log("Not enough info to create game")
            return None
        """if overview != None:
            g.overview = overview.text
        if trailer != None:
            g.trailer = trailer.text
        """

        g = Element()
        g.type = 'Game'
        g.mediaType = MediaType.get(MediaType.identifier == 'de.lad1337.games')
        g.setField('id', int(idTag.text), self.tag)
        g.setField('name', titleTag.text, self.tag)
        g.setField(
            'front_image',
            self._boxartUrl(imagesTag, platformIDTag.text, base_url, 'front'),
            self.tag)
        g.setField('fanart_image',
                   self._fanartUrl(imagesTag, base_url, 'original'), self.tag)
        g.setField('genre', self._genresStr(genresTag), self.tag)

        if release_date is not None:
            try:
                g.setField(
                    'release_date',
                    datetime.datetime.strptime(release_date.text, "%m/%d/%Y"),
                    self.tag)
            except ValueError:
                ddd = None
                if release_date is not None:
                    ddd = dateParser(release_date.text)
                if ddd is not None and hasattr(ddd, 'year') and hasattr(
                        ddd, 'month') and hasattr(ddd, 'day'):
                    g.setField('release_date',
                               datetime.datetime(ddd.year, ddd.month, ddd.day),
                               self.tag)
                else:
                    g.setField('release_date', datetime.datetime.now(),
                               self.tag)
        else:
            g.setField('release_date', datetime.datetime.now(), self.tag)

        if trailer is not None:
            # http://stackoverflow.com/questions/2639582/python-small-regex-problem
            yid = re.search(r'(?<=\?v\=)[\w-]+',
                            trailer.text)  # the games db uses youtube urls
            g.setField('trailer', yid.group(0), self.tag)
        else:
            g.setField('trailer', '', self.tag)

        if int(platformIDTag.text) not in self._pCache:
            q = Element.select().where(
                Element.mediaType == rootElement.mediaType,
                Element.type == 'Platform')
            for e in q:
                if e.getField('id', self.tag) == int(platformIDTag.text):
                    platform = e.copy()
                    platform.parent = rootElement
                    platform.saveTemp()
                    self._pCache[int(platformIDTag.text)] = platform
                    g.parent = platform
                    g.saveTemp()
                    self.progress.addItem()
                    break
            else:
                return None
        else:
            g.parent = self._pCache[int(platformIDTag.text)]
            g.saveTemp()

            self.progress.addItem()
            return g
Пример #10
0
    def _createGameFromTag(self, game_tag, base_url, rootElement):
        titleTag = game_tag.find('GameTitle')
        idTag = game_tag.find('id')
        platformTag = game_tag.find('Platform')
        platformIDTag = game_tag.find('PlatformId')
        imagesTag = game_tag.find('Images')
        genresTag = game_tag.find('Genres')
        overview = game_tag.find('Overview')
        release_date = game_tag.find('ReleaseDate')
        trailer = game_tag.find('Youtube')

        if titleTag is None or idTag is None or platformTag is None or platformIDTag is None:
            log("Not enough info to create game")
            return None
        """if overview != None:
            g.overview = overview.text
        if trailer != None:
            g.trailer = trailer.text
        """

        g = Element()
        g.type = 'Game'
        g.mediaType = MediaType.get(MediaType.identifier == 'de.lad1337.games')
        g.setField('id', int(idTag.text), self.tag)
        g.setField('name', titleTag.text, self.tag)
        g.setField('front_image', self._boxartUrl(imagesTag, platformIDTag.text, base_url, 'front'), self.tag)
        g.setField('fanart_image', self._fanartUrl(imagesTag, base_url, 'original'), self.tag)
        g.setField('genre', self._genresStr(genresTag), self.tag)

        if release_date is not None:
            try:
                g.setField('release_date', datetime.datetime.strptime(release_date.text, "%m/%d/%Y"), self.tag)
            except ValueError:
                ddd = None
                if release_date is not None:
                    ddd = dateParser(release_date.text)
                if ddd is not None and hasattr(ddd, 'year') and hasattr(ddd, 'month') and hasattr(ddd, 'day'):
                    g.setField('release_date', datetime.datetime(ddd.year, ddd.month, ddd.day), self.tag)
                else:
                    g.setField('release_date', datetime.datetime.now(), self.tag)
        else:
            g.setField('release_date', datetime.datetime.now(), self.tag)

        if trailer is not None:
            # http://stackoverflow.com/questions/2639582/python-small-regex-problem
            yid = re.search(r'(?<=\?v\=)[\w-]+', trailer.text) # the games db uses youtube urls
            g.setField('trailer', yid.group(0), self.tag)
        else:
            g.setField('trailer', '', self.tag)

        if int(platformIDTag.text) not in self._pCache:
            q = Element.select().where(Element.mediaType == rootElement.mediaType, Element.type == 'Platform')
            for e in q:
                if e.getField('id', self.tag) == int(platformIDTag.text):
                    platform = e.copy()
                    platform.parent = rootElement
                    platform.saveTemp()
                    self._pCache[int(platformIDTag.text)] = platform
                    g.parent = platform
                    g.saveTemp()
                    self.progress.addItem()
                    break
            else:
                return None
        else:
            g.parent = self._pCache[int(platformIDTag.text)]
            g.saveTemp()

            self.progress.addItem()
            return g
Пример #11
0
    def _createGameFromTag(self, game_tag, base_url, rootElement):
        titleTag = game_tag.find("GameTitle")
        idTag = game_tag.find("id")
        platformTag = game_tag.find("Platform")
        platformIDTag = game_tag.find("PlatformId")
        imagesTag = game_tag.find("Images")
        genresTag = game_tag.find("Genres")
        overview = game_tag.find("Overview")
        release_date = game_tag.find("ReleaseDate")
        trailer = game_tag.find("Youtube")

        if titleTag is None or idTag is None or platformTag is None or platformIDTag is None:
            log("Not enough info to create game")
            return None
        """if overview != None:
            g.overview = overview.text
        if trailer != None:
            g.trailer = trailer.text
        """

        g = Element()
        g.type = "Game"
        g.mediaType = MediaType.get(MediaType.identifier == "de.lad1337.games")
        g.setField("id", int(idTag.text), self.tag)
        g.setField("name", titleTag.text, self.tag)
        g.setField("front_image", self._boxartUrl(imagesTag, platformIDTag.text, base_url, "front"), self.tag)
        g.setField("fanart_image", self._fanartUrl(imagesTag, base_url, "original"), self.tag)
        g.setField("genre", self._genresStr(genresTag), self.tag)

        if release_date is not None:
            try:
                g.setField("release_date", datetime.datetime.strptime(release_date.text, "%m/%d/%Y"), self.tag)
            except ValueError:
                ddd = None
                if release_date is not None:
                    ddd = dateParser(release_date.text)
                if ddd is not None and hasattr(ddd, "year") and hasattr(ddd, "month") and hasattr(ddd, "day"):
                    g.setField("release_date", datetime.datetime(ddd.year, ddd.month, ddd.day), self.tag)
                else:
                    g.setField("release_date", datetime.datetime.now(), self.tag)
        else:
            g.setField("release_date", datetime.datetime.now(), self.tag)

        if trailer is not None:
            # http://stackoverflow.com/questions/2639582/python-small-regex-problem
            yid = re.search(r"(?<=\?v\=)[\w-]+", trailer.text)  # the games db uses youtube urls
            g.setField("trailer", yid.group(0), self.tag)
        else:
            g.setField("trailer", "", self.tag)

        if int(platformIDTag.text) not in self._pCache:
            q = Element.select().where(Element.mediaType == rootElement.mediaType, Element.type == "Platform")
            for e in q:
                if e.getField("id", self.tag) == int(platformIDTag.text):
                    platform = e.copy()
                    platform.parent = rootElement
                    platform.saveTemp()
                    self._pCache[int(platformIDTag.text)] = platform
                    g.parent = platform
                    g.saveTemp()
                    self.progress.addItem()
                    break
            else:
                return None
        else:
            g.parent = self._pCache[int(platformIDTag.text)]
            g.saveTemp()

            self.progress.addItem()
            return g