def __init__(self, db): PostImporter.__init__(self, db) config_parser = getConfig() self.start_date = config_parser.eval("DEFAULT", "start_date") self.end_date = config_parser.eval("DEFAULT", "end_date") self._data_folder = self._config_parser.eval(self.__class__.__name__, "data_folder")
def __init__(self, db): PostImporter.__init__(self, db) self._max_number_of_threads = self._config_parser.eval( self.__class__.__name__, "max_number_of_threads") self._shorten_url_expended_url_dict = {} self._destination_url_source_url_dict = {} self.resolved_urls = []
def __init__(self, db): PostImporter.__init__(self, db) self._db = db self._twitter_crawler = Generic_Twitter_Crawler(db) self._missing_data_complementor = MissingDataComplementor(db) self._original_tsv_location = self._config_parser.eval( self.__class__.__name__, "original_tsv_location") self._limit_per_crawl = self._config_parser.eval( self.__class__.__name__, "limit_per_crawl") self._post_label_dict = {} self._start_len = len(self._post_label_dict.keys()) self._num_of_rows_in_dataset = 0 self._post_author_to_label_dict = {} chrome_options = Options() chrome_options.add_argument( "--headless") # to make the process run in background self._web_driver = selenium.webdriver.Chrome( executable_path=r'vendors\chromedriver\chromedriver.exe', chrome_options=chrome_options) # statistics vars self._counter = 0 self._more_than_one = 0 self._exactly_one = 0 self._loggin_processed = 0
def __init__(self, db): PostImporter.__init__(self, db) self._past_hours = self._config_parser.eval(self.__class__.__name__, "past_hours") self._author_name_as_domain = self._config_parser.eval( self.__class__.__name__, "author_name_as_domain") self._author_classify_dict = {} self._author_prop_dict = {}
def __init__(self, db): PostImporter.__init__(self, db) config_parser = getConfig() self.xmlPath = config_parser.get(self.__class__.__name__, "xml_path") # self.xmlPath = configInst.get(self.__class__.__name__,"XMDL_source_path") self.fileName = None self.CurrFolderPath = None
def __init__(self,db): PostImporter.__init__(self,db) configInst = getConfig() self.source_path = configInst.get(self.__class__.__name__,"FDL_source_path") self.file_date_format = configInst.get(self.__class__.__name__,"date_format") self.fileName = None #@review: not a field. make it local variable. self.URLforUnittest = None #@review: no code should be written especially for unittests (except the tests) self.CurrFolderPath = None #@review: coding convention: small letter field names and _ for private members
def __init__(self, db): PostImporter.__init__(self, db) config_parser = getConfig() self.start_date = config_parser.eval("DEFAULT", "start_date") self.end_date = config_parser.eval("DEFAULT", "end_date") self._data_folder = self._config_parser.eval(self.__class__.__name__, "data_folder") self._bad_actor_threshold = self._config_parser.eval( self.__class__.__name__, "bad_actor_threshold") self._optional_classes = self._config_parser.eval( self.__class__.__name__, "optional_classes") self._author_classify_dict = {}
def __init__(self, db): # politifact posts title are biased, dont use them as features PostImporter.__init__(self, db) self._domain = u"PolitiFact" self._subjects = self._actions = self._config_parser.eval( self.__class__.__name__, "subjects") self._posts_per_subject = self._actions = self._config_parser.eval( self.__class__.__name__, "posts_per_subject") self._post_types = self._actions = self._config_parser.eval( self.__class__.__name__, "post_types") self._author_classify_dict = {} self._author_prop_dict = {} self._post_type_dict = {}
def __init__(self, db): PostImporter.__init__(self, db) self._author_name_as_domain = self._config_parser.eval( self.__class__.__name__, "author_name_as_domain") self._retrieve_news_by_keywords = self._config_parser.eval( self.__class__.__name__, "retrieve_news_by_keywords") self._num_of_top_terms = self._config_parser.eval( self.__class__.__name__, "num_of_top_terms") self._filter_sentences = self._config_parser.eval( self.__class__.__name__, "filter_sentences") self._topic_term_manager = Topic_Term_Manager(db) self._characters_to_add_to_unstemmed_words = [ 'e', 'able', 'al', 'ial' 'ion', 'ing', 'er', 'ies' ]
def __init__(self, db): PostImporter.__init__(self, db) self._input_csv_file = self._config_parser.eval(self.__class__.__name__, "input_csv_file") # There is no author so the website would be the author. We should not include this author in the analysis. self._author_name = unicode("snopes")
def __init__(self, db): PostImporter.__init__(self, db) self._data_folder = self._config_parser.eval(self.__class__.__name__, "data_folder")
def execute(self, window_start=None): logging.info("execute") logging.info("PostImporter.execute(self, window_start)") PostImporter.execute(self, window_start)
def __init__(self, db): PostImporter.__init__(self, db) self._path_to_file = self._config_parser.eval(self.__class__.__name__, "path_to_file")