示例#1
0
 def process_data(self, data: FilteredDomainData, **kwargs):
     account = kwargs.get("Account")
     # is_domain_good = False
     is_spammed = False
     try:
         if isinstance(data, FilteredDomainData) and isinstance(account, SiteAccount):
             majestic = MajesticCom(account)
             if self._en_spam_check:
                 self._filter_domain_name(domain=data.domain)
                 # self._filter_anchor_text(majestic, data.domain)
                 # self._filter_ref_domains(majestic, data.domain)
             if self._en_tf_check:
                 data = self._filter_tf_cf_backlink_ratio(majestic, data)
             if not (data.tf >= self._min_tf and data.ref_domains >= self._min_ref_domains):
                 raise ValueError("tf or cf doesn't match. tf:" + str(data.tf) + " cf: " + str(data.cf) + " ref domain: " + str(data.ref_domains))
             # if data.backlinks / data.ref_domains > self._max_backlink_to_ref_domain_ratio:
             #     raise MajesticSpamException("backlink to ref domain ratio is greater than {0:.1f}".format(self._max_backlink_to_ref_domain_ratio,))
             if self._en_spam_check:
                 self._filter_anchor_text(majestic, data.domain)
                 self._filter_ref_domains(majestic, data.domain)
             # is_domain_good = True
         else:
             raise ValueError("account is none in process_data")
     except MajesticSpamException as mjx_ex:
         is_spammed = True
         data.exception = str(mjx_ex)
     except Exception as ex:
         data.exception = str(ex)
         # ErrorLogger.log_error("MajesticFilter.process_data()", ex, str(data))
     finally:
         PrintLogger.print("Majestic processed: '" + str(data) + "' with: " + account.userID)
         if isinstance(data, FilteredDomainData):
             with self._sync_lock:
                 self._job_done += 1
                 if account is not None:
                     account.Available = True
                 # if data.cf >= self._min_cf and data.tf >= self._min_tf:
                 if data.tf >= self._min_tf and data.cf >= self._min_cf and data.ref_domains >= self._min_ref_domains:
                 # if data.tf >= self._min_tf and data.ref_domains >= self._min_ref_domains:
                     #print("Majatic output:", data)
                     # PrintLogger.print("domain: " + data.domain + " is good.")
                     if not self._is_throughput_debug:
                         if is_spammed:
                             CsvLogger.log_to_file(self._bad_log_file, [data.to_tuple()], dir_path=FilePath.get_temp_db_dir())
                         else:
                             CsvLogger.log_to_file(self._log_file, [data.to_tuple()], dir_path=FilePath.get_temp_db_dir()) # log this to file
                     self._output_queue.put(data)
                     return data
                 # elif is_spammed:
                 #     if not self._is_throughput_debug:
                 #         CsvLogger.log_to_file(self._bad_log_file, [data.to_tuple()], dir_path=FilePath.get_temp_db_dir())
                 #     self._output_queue.put(data)
                     # return data
                 else:
                     if self._is_throughput_debug:
                         self._output_queue.put(data)
                     # return None
                     # print("domain: " + data.domain + " has exception:" + data.exception)
         else:
             pass
示例#2
0
 def process_data_batch(self, data: collections.Iterable, **kwargs):
     #print("MozFilter processing: ", data)
     account = kwargs.get("Account")
     temp = []
     try:
         if isinstance(data, collections.Iterable) and isinstance(account, SiteAccount):
             temp = [x for x in data if isinstance(x, FilteredDomainData) and TldUtility.is_top_tld(x.domain)]
             check_list = [y.domain for y in temp]
             sleep_time =random.randint(self._min_sleep_time, self._max_wait)
             time.sleep(sleep_time)
             moz = MozCom(account)
             if not self._is_throughput_debug:
                 rankings = moz.get_ranking_data_batch(check_list, limit=len(check_list))
             else:
                 rankings = [100] * len(temp)
             for i in range(len(temp)):
                 temp[i].da = rankings[i]
         else:
             raise ValueError("account is none in process_data_batch()")
     except Exception as ex:
         ErrorLogger.log_error("MozFilter", ex, "process_data_batch() " + str(data) + " account: " + account.userID)
     finally:
         PrintLogger.print("Moz processed: " + str(data) + " with: " + account.userID)
         with self._sync_lock:
             job_done = [x for x in data if x is not None]
             self._job_done += len(job_done)
             if account is not None:
                 account.Available = True
             for item in temp:
                 if isinstance(item, FilteredDomainData):
                     # print("moz processed:", item.domain)
                     if item.da >= self._min_DA_value:
                         if not self._is_throughput_debug:
                             CsvLogger.log_to_file(self._log_file, [(item.domain, item.da)]) # log this to file
                         self._output_queue.put(item)
示例#3
0
 def process_data(self, data: FilteredDomainData, **kwargs):
     #print("MozFilter processing: ", data)
     account = kwargs.get("Account")
     try:
         if isinstance(data, FilteredDomainData) and isinstance(account, SiteAccount):
             if TldUtility.is_top_tld(data.domain):
                 sleep_time =random.randint(self._min_sleep_time, self._max_wait)
                 time.sleep(sleep_time)
                 moz = MozCom(account)
                 if not self._is_throughput_debug:
                     ranking = moz.get_ranking_data(data.domain)
                 else:
                     ranking = 100
                 data.da = ranking
             else:
                 pass
         else:
             raise ValueError("account is none in process_data")
     except Exception as ex:
         ErrorLogger.log_error("MozFilter", ex, "process_data() " + str(data) + " account: " + account.userID)
     finally:
         PrintLogger.print("Moz processed: " + str(data) + " with: " + account.userID)
         if isinstance(data, FilteredDomainData):
             with self._sync_lock:
                 self._job_done += 1
                 if account is not None:
                     account.Available = True
             if data.da >= self._min_DA_value:
                 if not self._is_throughput_debug:
                     CsvLogger.log_to_file(self._log_file, [(data.domain, data.da)]) # log this to file
                 self._output_queue.put(data)
 def vaccum_db(self):
     try:
         self.db.interrupt()
     except Exception as ex:
         PrintLogger.print(ex)
     finally:
         self.db = sqlite3.connect(self.filename, timeout=10)
         self.db.execute("VACUUM {0:s};".format(self._table_name,))
 def vaccum_db(self):
     try:
         self.db.interrupt()
     except Exception as ex:
         PrintLogger.print(ex)
     finally:
         self.db = sqlite3.connect(self.filename, timeout=10)
         self.db.execute("VACUUM {0:s};".format(self._table_name, ))
 def empty_feedback_queue(self):
     try:
         PrintLogger.print("in MemoryControlPs: trying to empty queue")
         while not self._feedback_queue.empty():
             obj = self._feedback_queue.get(block=False, timeout=0.001)
             if obj is not None:
                 self.memory_limit_callback(obj)
     except Exception as ex:
         PrintLogger.print("in MemoryControlPs.empty_feedback_queue()" + str(ex))
示例#7
0
 def empty_feedback_queue(self):
     try:
         PrintLogger.print("in MemoryControlPs: trying to empty queue")
         while not self._feedback_queue.empty():
             obj = self._feedback_queue.get(block=False, timeout=0.001)
             if obj is not None:
                 self.memory_limit_callback(obj)
     except Exception as ex:
         PrintLogger.print("in MemoryControlPs.empty_feedback_queue()" +
                           str(ex))
        def wrap(*args, **kw):
            ts = time.time()
            result = method(*args, **kw)
            te = time.time()
            gap = te - ts
            if gap > log_if_longer > 0:
                PrintLogger.print('%r (%r, %r) %2.2f sec' %
                                  (method.__name__, args, kw, gap))
                ErrorLogger.log_error(ref,
                                      ValueError("Operation took too long."),
                                      "completed in " + str(gap))
            elif log_if_longer == 0:
                # PrintLogger.print('%r (%r, %r) %2.2f sec' % (method.__name__, args, kw, gap))
                PrintLogger.print('%r took %2.2f sec' % (method.__name__, gap))

            return result