def fetch(self, url): feed = feedparser.parse(url) for entry in feed.entries: title = entry.title description = entry.summary.replace("\n", " ") description = re.sub(r"[ \t]+", " ", description) url = entry.link if Job.objects.filter(origin_url=url).count() != 0: continue job = Job(title=title, description=description, origin_url=url, url=url) job.save() autotag(job) log.info("added job for review: %s" % job)
def fetch(self, url): feed = feedparser.parse(url) print print url for entry in feed.entries: title = entry.title print title description = entry.summary.replace("\n", " ") description = re.sub(r"[ \t]+", " ", description) url = entry.link if Job.objects.filter(origin_url=url).count() != 0: continue job = Job(title=title, description=description, origin_url=url, url=url) job.save() autotag(job) log.info("added job for review: %s" % job)
def email_to_job(msg): logging.info("looking at email with subject: %s", msg['subject']) if not is_job_email(msg): return None if Job.objects.filter(email_message_id=msg['message-id']).count() == 1: return None logging.info("parsing job email %s", msg['message-id']) j = Job() j.contact_name, j.contact_email = rfc822.parseaddr(msg['from']) j.contact_name = normalize_name(j.contact_name) j.contact_email = j.contact_email.lower() # get the employer #j.from_domain = j.from_address.split('@')[1] j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject']) j.title = re.sub("[\n\r]", "", j.title) j.email_message_id = msg['message-id'] j.description = get_html(get_body(msg)) t = time.mktime(rfc822.parsedate(msg['date'])) j.post_date = datetime.datetime.fromtimestamp(t) if not j.description: logging.warn("missing body") return None if 'http://jobs.code4lib.org' in j.description: logging.warn("not loading a job that shortimer posted") return None j.save() autotag(j) j.save() return j
def email_to_job(msg): logging.info("looking at email with subject: %s", msg['subject']) if not is_job_email(msg): return None if Job.objects.filter(email_message_id=msg['message-id']).count() == 1: return None logging.info("parsing job email %s", msg['message-id']) j = Job() j.contact_name, j.contact_email = rfc822.parseaddr(msg['from']) j.contact_name = normalize_name(j.contact_name) j.contact_email = j.contact_email.lower() # get the employer #j.from_domain = j.from_address.split('@')[1] j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject']) j.title = re.sub("[\n\r]", "", j.title) j.email_message_id = msg['message-id'] j.description = get_html(get_body(msg)) t = time.mktime(rfc822.parsedate(msg['date'])) j.post_date = datetime.datetime.fromtimestamp(t) if not j.description: logging.warn("missing body") return None j.save() # automatically assign subjects based on keywords in the job description for n in nouns(j.description): n = n.lower() for subject in Subject.objects.filter(keywords__name=n): j.subjects.add(subject) j.save() return j