def __init__(self): with open('./config.json') as f: config_json = json.load(f) self.event_labels = config_json["event_labels"] self.url_today = "https://planerkulturalny.pl/api/rest/events.json?start_date=" self.scrap = Scrap()
def __init__(self): with open('./config.json') as f: config_json = json.load(f) self.event_labels = config_json["event_labels"] self.url_today = "http://go.wroclaw.pl/api/v1.0/events?key=1011488156695333384118402645947989718531&time-from=" self.category = [ "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton", "Rozrywka", "Dziecko" ] self.scrap = Scrap()
def __init__(self): with open('./config.json') as f: config_json = json.load(f) self.event_labels = config_json["event_labels"] self.url_today = "http://www.poznan.pl/mim/public/ws-information/?co=getCurrentDayEvents" self.url_to_given_day = "http://www.poznan.pl/mim/public/ws-information/?co=getEventsToDate&dateTo=" self.category = [ "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton", "Rozrywka", "Dziecko" ] self.scrap = Scrap()
class fillDB: def __init__(self, *args, **kwargs): self.s = Scrap() # self.eP = EventAPI_Poznan() def getEvents(self, dateStart=None, dateEnd=None): eventsScraping = self.s.scrap_kiwiportal( 'https://www.kiwiportal.pl/wydarzenia/m/warszawa') eventsScraping = eventsScraping + self.s.scrap_kiwiportal( 'https://www.kiwiportal.pl/wydarzenia/m/krakow') eventsScraping = eventsScraping + self.s.scrap_kiwiportal( 'https://www.kiwiportal.pl/wydarzenia/m/trojmiasto') eventsScraping = eventsScraping + self.s.scrap_kiwiportal( 'https://www.kiwiportal.pl/wydarzenia/m/poznan') eventsScraping = eventsScraping + self.s.scrap_kiwiportal( 'https://www.kiwiportal.pl/wydarzenia/m/zakopane') eventsScraping = eventsScraping + self.s.scrap_kiwiportal( 'https://www.kiwiportal.pl/wydarzenia/m/wroclaw') # # -- save/read to txt file -- # with open('/home/sleter/Documents/Github/EVENTION/EVENTION.DataHarvester/eventScraping.txt', 'w+') as f: # for item in eventsScraping: # f.write("%s\n" % item) # -- # eventsScraping = [] # with open('/home/sleter/Documents/Github/EVENTION/EVENTION.DataHarvester/eventScraping.txt', 'r') as f: # for item in f: # eventsScraping.append(item) # eventsScraping = eventsScraping + self.eP.get_event_today() return eventsScraping def load_to_database(self): events = self.getEvents() url = "http://localhost:9000/event/create" for event_json in events: r = requests.post(url, json=json.loads(event_json)) print("Status code: {}".format(r.status_code))
class EventAPI_Wroclaw: def __init__(self): with open('./config.json') as f: config_json = json.load(f) self.event_labels = config_json["event_labels"] self.url_today = "http://go.wroclaw.pl/api/v1.0/events?key=1011488156695333384118402645947989718531&time-from=" self.category = [ "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton", "Rozrywka", "Dziecko" ] self.scrap = Scrap() def _save_json(self, url, name): """ METHOD TO TESTS Save json from requests :param url: address url :param name: file name """ _json = requests.get(url) with open(name, "w", encoding="UTF-8") as f: f.write(_json.text) def _save_json_today(self, date): """ METHOD TO TESTS Saving to json current day events' date :param date: current day """ self._save_json(self.url_today + date + "&time-to=" + date, "events_today_Wroclaw.json") def _save_json_to_given_day(self, date, end_date): """ METHOD TO TESTS Saving to json to current day events' date :param date: current day :param end_date: current day + 7 days """ self._save_json(self.url_today + date + "&tome-to=" + end_date, "events_to_given_day_Wroclaw.json") def get_json(self, name_json): """ METHOD TO TESTS Read json from file and map to dic :param name_json: name of json :return: list of dict """ with open(name_json, "r", encoding="UTF-8") as f: r_json = f.read() result = json.loads(r_json) return result def make_request_and_get_json(self, url): """ Make request and return a list of dict :param url: address url :return: list of dict """ r_json = requests.get(url) result = json.loads(r_json.text) return result def _get_first_sentence(self, long_description): """ Get short Description from long description :param long_description: event long description :return: short description with 3 dots. """ result = str(long_description.split(".")[0]) return result + "..." def _check_category(self, category): #1-sport, 2-Kultura, 3-Koncert, 4-Targi, 5-Inne, 6-Hackaton 7 rozrywka 8 dziecko if "Biegi" in category.split(" "): return 1 elif category in self.category: return self.category.index(category) + 1 else: return 5 def parse_data(self, date): try: year, month, day = date.split('-') except AttributeError: return False return datetime.datetime(int(year), int(month), int(day), int(0), int(0), int(0)) def parse_json(self, list_of_dic): """ :param list_of_dic: :return: """ EVENT = [] #['name', 'shortDescription', 'longDescription', 'creationDate', 'eventStart', 'eventEnd', 'ownerId', 'geoJSON', 'imageSource', 'address', addressCity] for dic in list_of_dic["items"]: event = {"event": {}, "categories": ""} try: event['event']["name"] = dic["offer"]["title"] event['event']["shortDescription"] = self._get_first_sentence( dic["offer"]["longDescription"]) event['event']["longDescription"] = dic["offer"][ "longDescription"] event['event']["creationDate"] = datetime.datetime.now() event['event']["eventStart"] = self.parse_data( dic["startDate"].split("T")[0]) event['event']["eventEnd"] = self.parse_data( dic["endDate"].split("T")[0]) event['event']["ownerId"] = 1 try: event['event']["imageSource"] = dic["offer"]["mainImage"][ "standard"] except: event['event']["imageSource"] = "" try: event['event']["address"] = "Polska, " + dic["address"][ "street"] + " " + dic["address"]["zipCode"] event['event']["addressCity"] = dic["address"]["city"] except: event["event"]["address"] = "Polska, " + dic["address"][ "street"] + " " + dic["address"]["city"] event['event']["addressCity"] = dic["address"]["city"] except: pass event['event']["geoJSON"] = str( self.scrap.create_geojson(query=event["event"]["address"])) try: event["categories"] = [ self._check_category(dic["offer"]["categories"][0]["name"]) ] except: event['categories'] = [5] #Inne def date_converter(o): if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() e = json.dumps(event, ensure_ascii=False, default=date_converter) if e not in EVENT: EVENT.append(e) return EVENT def get_event_today(self): date = datetime.datetime.now() str_date = str(date.date()) #self._save_json_today(str_date) #to test #list_of_dict = self.get_json("events_today_Wroclaw.json") #to test list_of_dict = self.make_request_and_get_json(self.url_today + str_date) result = self.parse_json(list_of_dict) return result def get_event_7days(self): date = datetime.datetime.now() str_date = str(date.date()) end_date = datetime.datetime.now() + datetime.timedelta( days=7) # get current day and add 7 days str_end_date = str(end_date.date()) #self._save_json_to_given_day(str_date, str_end_date) #to test #dict= self.get_json("events_to_given_day_Wroclaw.json") #to test dict = self.make_request_and_get_json(self.url_today + str_date + "&time_to=" + str_end_date) result = self.parse_json(dict) return result
class EventAPI_Poznan: def __init__(self): with open('./config.json') as f: config_json = json.load(f) self.event_labels = config_json["event_labels"] self.url_today = "http://www.poznan.pl/mim/public/ws-information/?co=getCurrentDayEvents" self.url_to_given_day = "http://www.poznan.pl/mim/public/ws-information/?co=getEventsToDate&dateTo=" self.category = [ "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton", "Rozrywka", "Dziecko" ] self.scrap = Scrap() def _save_xml(self, url, name): """ METHOD TO TESTS Save xml from url :param url: address url :param name: file name """ request = requests.get(url) xml = request.text with open(name, "w", encoding="UTF-8") as f: f.write(xml) def _save_xml_today(self): """ METHOD TO TESTS Saving to xml current day events' date :return: """ self._save_xml(self.url_today, "events_today_Poznan.xml") def _save_xml_to_given_day(self, date): """ METHOD TO TESTS Saving to xml to current day events' date :param data: Format data - e.g. 2019-05-01 :return: """ self._save_xml(self.url_to_given_day + date, "events_to_given_day_Poznan.xml") def get_xml(self, name_xml): """ METHOD TO TESTS Read xml from file :return: """ with open(name_xml, "r", encoding="UTF-8") as f: xml = f.read() tree = ET.ElementTree(ET.fromstring(xml)) root = tree.getroot() return root def make_request_and_get_root(self, url): """ Make request and return a root of xml file :param url: address url :return: root of xml file """ request = requests.get(url) xml = request.text tree = ET.ElementTree(ET.fromstring(xml)) root = tree.getroot() return root def _get_first_sentence(self, long_description): """ Get short Description from long description :param long_description: :return: """ result = str(long_description.split(".")[0]) return result + "..." def _check_category(self, category): # 1-sport, 2-Kultura, 3-Koncert, 4-Targi, 5-Inne, 6-Hackaton 7 rozrywka 8 dziecko if "Kultura" in category.split(" "): return 2 elif category in self.category: return self.category.index(category) + 1 elif "Konferencje," in category.split(" "): return 5 #KONFERENCJA else: return 5 def parse_data(self, date, date1=None): if date1 != None: date = date1 try: year, month, day = date.split(' ')[0].split('-') hour, minute, seconds = date.split(' ')[1].split(':') except AttributeError: return False result = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), 0) return result def parse_xml(self, root): """ Parsing xml :param root: xml root :return: json with parsed data """ EVENT = [] for elem in root: try: url = elem[2].text #event_url image_url = get_image_src(url) # get image src from scrapping category = self._check_category(elem[10].text) geoJSON = self.scrap.create_geojson(query=elem[5][2].text) event_array = [ elem[3][0][0].text, #name self._get_first_sentence( elem[3][0][2].text), #shortDescription elem[3][0][2].text, #longDescription self.parse_data(elem[1].text), #creationDate self.parse_data(elem[7].text), #eventStart self.parse_data(elem[8].text, elem[7].text), #eventEnd 1, #ownerId str(geoJSON), #geoJSON image_url, #imageSource elem[5][2].text, #addres "Poznan" ] #adressCity event = {'event': {}, 'categories': [category]} for label, eve in zip(self.event_labels, event_array): event['event'][label] = eve def date_converter(o): if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() e = json.dumps(event, ensure_ascii=False, default=date_converter) if e not in EVENT: EVENT.append(e) if len(EVENT) > 10: break except: pass #result = json.dumps(EVENT, ensure_ascii=False, default=date_converter) return EVENT def get_event_today(self): #self._save_xml_today() #to test #root = self.get_xml("events_today_Poznan.xml") #to test root = self.make_request_and_get_root(self.url_today) result = self.parse_xml(root) return result def get_event_7days(self): #self._save_xml_to_given_day(date) #to test #root = self.get_xml("events_to_given_day_Poznan.xml") #to test date = datetime.datetime.now() + datetime.timedelta( days=7) #get current day and add 7 days str_date = str(date.date()) root = self.make_request_and_get_root(self.url_to_given_day + str_date) result = self.parse_xml(root) return result
class EventAPI_Gdansk: def __init__(self): with open('./config.json') as f: config_json = json.load(f) self.event_labels = config_json["event_labels"] self.url_today = "https://planerkulturalny.pl/api/rest/events.json?start_date=" self.scrap = Scrap() def _save_json(self, url, name): """ METHOD TO TESTS Save json from requests :param url: address url :param name: file name """ _json = requests.get(url) with open(name, "w", encoding="UTF-8") as f: f.write(_json.text) def _save_json_today(self, date): """ METHOD TO TESTS Saving to json current day events' date :param date: current day """ self._save_json(self.url_today + date, "events_today_Gdansk.json") def _save_json_to_given_day(self, date, end_date): """ METHOD TO TESTS Saving to json to current day events' date :param date: current day :param end_date: current day + 7 days """ self._save_json(self.url_today + date + "&end_date=" + end_date, "events_to_given_day_Gdansk.json") def get_json(self, name_json): """ METHOD TO TESTS Read json from file and map to dic :param name_json: name of json :return: list of dict """ with open(name_json, "r", encoding="UTF-8") as f: r_json = f.read() result = json.loads(r_json) return result def make_request_and_get_json(self, url): """ Make request and return a list of dict :param url: address url :return: list of dict """ r_json = requests.get(url) result = json.loads(r_json.text) return result def _get_first_sentence(self, long_description): """ Get short Description from long description :param long_description: event long description :return: short description with 3 dots. """ result = str(long_description.split(".")[0]) return result + "..." def get_category(self, event): """ :return: """ if event["category"] not in self.category: self.category[event["category"]] = event["name"] # 96 - Kultura 19 - Teatr 51- Sztuka 1 - Sztuka/Kultura/Kino # 77 - Sport 83 - Kultura 35 - Teatr/Muzyka def parse_data(self, date): try: year, month, day = date.split('-') except AttributeError: return False return datetime.datetime(int(year), int(month), int(day), int(0), int(0), int(0)) def parse_json(self, list_of_dic): """ :param list_of_dic: :return: """ EVENT = [] #['name', 'shortDescription', 'longDescription', 'creationDate', 'eventStart', 'eventEnd', 'ownerId', 'geoJSON', 'imageSource', 'category', 'address', addressCity] for dic in list_of_dic: event = { "event": {}, "categories": "", } try: event["event"]["name"] = dic["name"] event["event"]["shortDescription"] = self._get_first_sentence( dic["descLong"]) event["event"]["longDescription"] = dic["descLong"] event["event"]["creationDate"] = datetime.datetime.now() event["event"]["eventStart"] = self.parse_data( dic["startDate"].split("T")[0]) event["event"]["eventEnd"] = self.parse_data( dic["endDate"].split("T")[0]) event["event"]["ownerId"] = 1 #print(dic['place']['name']) try: event["event"]["geoJSON"] = str( self.scrap.create_geojson(query="Polska, Gdańsk " + dic['place']['name'])) except: event["event"]["geoJSON"] = "{}" try: event["event"]["imageSource"] = dic["attachments"][0][ "fileName"] except: event["event"]["imageSource"] = None if dic["categoryId"] == 77: event["categories"] = [1] # sport else: event["categories"] = [5] # kultura event["event"]["address"] = "Gdańsk, " + dic["place"]["name"] event["event"]["addressCity"] = "Gdańsk" def date_converter(o): if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() e = json.dumps(event, ensure_ascii=False, default=date_converter) if e not in EVENT: EVENT.append(e) except: pass return EVENT def get_event_today(self): date = datetime.datetime.now() str_date = str(date.date()) #self._save_json_today(str_date) #to test #list_of_dict = self.get_json("events_today_Gdansk.json") #to test list_of_dict = self.make_request_and_get_json(self.url_today + str_date) result = self.parse_json(list_of_dict) return result def get_event_7days(self): date = datetime.datetime.now() str_date = str(date.date()) end_date = datetime.datetime.now() + datetime.timedelta( days=7) # get current day and add 7 days str_end_date = str(end_date.date()) #self._save_json_to_given_day(str_date, str_end_date) #to test #dict= self.get_json("events_to_given_day_Gdansk.json") #to test dict = self.make_request_and_get_json(self.url_today + str_date + "&end_date=" + str_end_date) result = self.parse_json(dict) return result
def __init__(self, *args, **kwargs): self.s = Scrap()