예제 #1
0
파일: sms_send.py 프로젝트: yujiye/Codes
def send_verify_code(mobile, content):
    flag = False
    if mobile.startswith("110"):
        return True

    try:
        url = "http://sms.combmobile.com/sdk/gxmt"
        pwd = util.md5str(SN+PWD).upper()

        payload = {
            "sn": SN,
            "pwd": pwd,
            "mobile": mobile,
            "content": content.encode("gb2312"),
            "ext": "",
            "stime": "",
            "rrid": ""
        }

        r = requests.post(url, data=payload)
        #logger.info(r.text)
        root = etree.fromstring(r.text.encode("utf8"))
        if root.tag == '{http://tempuri.org/}string':
            return_code = root.text
            return_code_int = int(return_code)
            if return_code_int > 0:
                flag = True
                logger.info("sent ok. return code: %s", return_code)
            else:
                logger.info("sent fail. return code: %s", return_code)
    except:
        traceback.print_exc()
    return flag
예제 #2
0
def insert(shortname, name,brief,website):
    name = name.replace("(开业)","")
    sourceId = util.md5str(name)
    sid = parser_db_util.save_company_yitai(shortname, name,13100,sourceId,brief)
    logger.info("sid:%s->sourceId:%s",sid, sourceId)
    parser_db_util.save_source_company_name(sid, name, 12010)
    parser_db_util.save_source_company_name(sid, shortname, 12020)
예제 #3
0
def findc(aname):
    rvalue = 0
    conn = db.connect_torndb()
    aname = aname.replace("(开业)", "")
    sourceId = util.md5str(aname)
    sc = conn.get(
        "select * from source_company where source=13100 and sourceId=%s",
        sourceId)
    if sc is None:
        logger.info("wrong")
        exit()
    companyId = sc["companyId"]
    company = conn.get("select * from company where id=%s", companyId)
    scs = conn.query("select * from source_company where companyId=%s",
                     companyId)
    # if len(scs) == 1 and scs[0]["source"] == 13096 and company is not None:
    if company is not None and company["active"] in ["A", "P", "N"]:
        # conn.update("update company set brief=%s,locationId=2 where id=%s", brief, companyId)
        # conn.update("update corporate set brief=%s,locationId=2 where id=%s", brief, company["corporateId"])

        # if company["active"] == "A":
        rvalue = 1
        # conn.update("update company set brief=%s,locationId=2 where id=%s", brief,companyId)
        # conn.update("update corporate set brief=%s,locationId=2 where id=%s", brief, company["corporateId"])

    conn.close()
    return rvalue, companyId
예제 #4
0
 def __init__(self, id, author, nickname, like_num, content):
     self.id = int(id)
     self.author = int(author)
     self.nickname = nickname
     self.num_likes = int(like_num)
     self.content = content
     self.md5_content = md5str(content)
예제 #5
0
def insert(shortname, name, brief, website):
    name = name.replace("(开业)", "")
    sourceId = util.md5str(name)
    sid = parser_db_util.save_company_yitai(shortname, name, 13100, sourceId,
                                            brief)
    logger.info("sid:%s->sourceId:%s", sid, sourceId)
    parser_db_util.save_source_company_name(sid, name, 12010)
    parser_db_util.save_source_company_name(sid, shortname, 12020)
    if website is not None and website.strip() != "":
        website = url_helper.url_normalize(website)
        if website is not None and website != "":
            if website.find("http://") == -1 and website.find("https://"):
                website = "http://" + website
            type, market, app_id = url_helper.get_market(website)
            if type == 4010:
                if website.find('sse.com') > 0:
                    pass
                else:
                    artifact = {
                        "sourceCompanyId": sid,
                        "name": shortname,
                        "description": None,
                        "link": website,
                        "domain": app_id,
                        "type": type
                    }

                    parser_db_util.save_artifacts_standard(sid, [artifact])
예제 #6
0
def insert_funding(sid, roundstr, inv, fundingDate, investor):
    try:
        inv = "".join(inv.split())
        if inv in ["超千万人民币", "千万人民币", "近千万人民币", "过千万人民币", "上千万人民币", "1千万人民币"]:
            inv = "1000万人民币"
        elif inv in [
                "超亿人民币", "近亿人民币", "过亿人民币", "上亿人民币", "亿人民币", "一亿人民币",
                "亿人民币及以上人民币"
        ]:
            inv = "1亿人民币"
        elif inv in ["超千万美元", "千万美元", "近千万美元", "过千万美元", "上千万美元", "1千万美元"]:
            inv = "1000万美元"
        elif inv in ["百万美元", "近百万美元", "过百万美元", "上百万美元", "1百万美元"]:
            inv = "100万美元"
        elif inv in ["百万人民币", "近百万人民币", "过百万人民币", "上百万人民币", "1百万人民币"]:
            inv = "100万人民币"

        if roundstr == "re-A轮":
            roundstr = "Pre-A"
        elif roundstr == "re-IPO":
            roundstr = "Pre-IPO"
        fundingRound, roundStr = itjuzi_helper.getFundingRound(
            unicode(roundstr))
        currency, investment, precise = itjuzi_helper.getMoney(unicode(inv))

        source_funding = {
            "sourceCompanyId": sid,
            "preMoney": None,
            "postMoney": None,
            "investment": investment,
            "precise": precise,
            "round": fundingRound,
            "roundDesc": roundStr,
            "currency": currency,
            "fundingDate": fundingDate,
            "newsUrl": None
        }
        source_investors = []
        source_investor = {
            "name": investor,
            "website": None,
            "description": None,
            "logo_url": None,
            "stage": None,
            "field": None,
            "type": 10020,
            "source": 13100,
            "sourceId": util.md5str(investor)
        }
        source_investors.append(source_investor)

        parser_db_util.save_funding_standard(source_funding, download_crawler,
                                             source_investors)
        # logger.info("%s/%s-------%s/%s/%s/%s", roundstr, inv, fundingRound, investment,precise,currency)
    except:
        logger.info("%s/%s/%s/%s", roundstr, inv, fdate, investor)
        # exit()
    pass
예제 #7
0
def insert(shortname,brief):
    sourceId = util.md5str(unicode(shortname))
    sid = parser_db_util.save_company_yitai(shortname, None,13120,sourceId,brief)
    logger.info("sid:%s->sourceId:%s",sid, sourceId)
    parser_db_util.save_source_company_name(sid, shortname, 12020)
    # for fullName in [name] + fullNames:
    #     parser_db_util.save_source_company_name(sid, fullName, 12010)

    return sid
예제 #8
0
def insert(shortname, name, brief, fullNames):
    name = name.replace("(开业)", "")
    sourceId = util.md5str(name)
    sid = parser_db_util.save_company_yitai(shortname, name, 13100, sourceId,
                                            brief)
    # logger.info("sid:%s->sourceId:%s",sid, sourceId)
    parser_db_util.save_source_company_name(sid, shortname, 12020)
    for fullName in [name] + fullNames:
        parser_db_util.save_source_company_name(sid, fullName, 12010)

    return sid
예제 #9
0
def add_2_company_list(name):
    name = name_helper.company_name_normalize(name)
    if name is None:
        return
    chinese, company = name_helper.name_check(name)
    if chinese is True and company is True:
        logger.info("fullname: %s", name)
        name_md5 = util.md5str(name)
        c = mongo.info.company_idx.find_one({"name_md5": name_md5})
        if c is None:
            data = {
                "name": name,
                "name_md5": name_md5,
                "createTime": datetime.datetime.utcnow()
            }
            mongo.info.company_idx.insert_one(data)
예제 #10
0
파일: common.py 프로젝트: hyj0/udp_trans
def fragmentstr(inputstr, type):
    retlst = []
    strmd5 = util.md5str(inputstr)

    bufLst = []
    strIndex = 0
    while True:
        onebuf = inputstr[strIndex:strIndex+BUFLEN]
        if len(onebuf) == 0:
            break
        bufLst.append(onebuf)
        strIndex += BUFLEN

    blockId = 0
    for onebuf in bufLst:
        oneMap = {'type':type, 'md5':strmd5, 'blockId':blockId, 'totalBlock':len(bufLst), 'buf':onebuf}
        retlst.append(oneMap)
        blockId = blockId + 1
    return retlst
예제 #11
0
def create_user(conn, item):
    SALT = "24114581331805856724"

    # disable old user
    user_id = item["userId"]
    conn.update(
        "update user set active='D', phoneVerify='N', emailVerify='N' where id=%s",
        user_id)

    # user
    phoneVerify = 'N'
    if item["phone"] is not None and item["phone"].strip != "":
        phoneVerify = 'Y'

    emailVerify = 'N'
    if item["email"] is not None and item["email"].strip != "":
        emailVerify = 'Y'

    new_user_id = conn.insert(
        "insert user(username,position,email,phone,userIdentify,loginFailTimes,"
        "phoneVerify,emailVerify,active,verifiedInvestor,createTime) values"
        "(%s,%s,%s,%s,%s,0,"
        "%s,%s,'Y','N',now())", item["username"], item.get("position"),
        item["email"], item["phone"], item["userIdentify"], phoneVerify,
        emailVerify)
    password = util.md5str(SALT + str(new_user_id) + item["password"])
    conn.update("update user set password=%s where id=%s", password,
                new_user_id)

    # organization (personal)
    org_id = conn.insert(
        "insert organization(name,type,status,grade,active,createUser,createTime,modifyUser,modifyTime) "
        "values(%s, 17010,31010,33020,'Y',%s,now(),%s,now())",
        item["username"], new_user_id, new_user_id)

    # user_organization_rel
    conn.insert(
        "insert user_organization_rel(userId,organizationId,active,createTime) values(%s,%s,'Y',now())",
        new_user_id, org_id)
    return new_user_id
예제 #12
0
def parserDevelop_save(source_company_id, item):
    if item is None:
        return
    logger.info("*** Development ***")
    html = item["content"]
    d = pq(html)
    #news & footprint
    lis = d('.history_ul > li')
    develop_rank = 0
    if len(lis) > 0:
        d_date = None
        for li in lis:
            try:
                d = pq(li)
                d_day = d('.date_day').text()
                d_year = d('.date_year').text()
                d_month = None
                if d_year is not None:
                    d_month = d_year[5:].strip()
                    if d_month == 'Jan':
                        d_month = '01'
                    elif d_month == 'Feb':
                        d_month = '02'
                    elif d_month == 'Mar':
                        d_month = '03'
                    elif d_month == 'Apr':
                        d_month = '04'
                    elif d_month == 'May':
                        d_month = '05'
                    elif d_month == 'Jun' or d_month == 'June':
                        d_month = '06'
                    elif d_month == 'Jul' or d_month == 'July':
                        d_month = '07'
                    elif d_month == 'Aug':
                        d_month = '08'
                    elif d_month == 'Sep' or d_month == 'Sept':
                        d_month = '09'
                    elif d_month == 'Oct':
                        d_month = '10'
                    elif d_month == 'Nov':
                        d_month = '11'
                    elif d_month == 'Dec':
                        d_month = '12'

                    d_year = d_year[0:4]

                d_type = d('div.li_type_icon').attr('title')
                d_title = d('div.li_desc > p').text()
                d_url = d('div.li_desc > p').attr('data-href')
                d_key = util.md5str(d_url)

                if d_year is None or d_year == '':
                    d_date = d_date
                else:
                    d_date = d_year + '-' + d_month + '-' + d_day
                logger.info("date: %s", d_date)

                develop_rank += 1
                develop_key = str(item["key"]) + '_' + str(develop_rank)

                if d_type == u'资本':
                    pass

                if d_type == u'其他':
                    logger.info("********其他")
                    # news
                    if d_url is not None and d_url.strip() != '' and len(
                            d_url) > 10:
                        logger.info(d_url)
                        pass

                    #footprint
                    else:
                        logger.info("FOOTPRINT")
                        if d_date is None:
                            continue
                        source_footprint = [{
                            "source": SOURCE,
                            "sourceCompanyId": source_company_id,
                            "footDate": d_date,
                            "footDesc": d_title,
                        }]
                        # logger.info(json.dumps(source_footprint, ensure_ascii=False, cls=util.CJsonEncoder))
                        # parser_db_util.save_footprints(source_company_id, source_footprint)

            except Exception, ex:
                logger.exception(ex)
예제 #13
0
def parser_develop(d, company_key, source_company_id):
    logger.info('*********** parsing develop **************')
    #news & footprint
    lis = d('.history_ul > li')
    develop_rank = 0
    if len(lis) > 0:
        for li in lis:
            try:
                d_day = d('.date_day').text()
                d_year =  d('.date_year').text()
                if d_year is not None:
                    d_month = d_year[5:].strip()
                    if d_month == 'Jan':
                        d_month = '01'
                    elif d_month == 'Feb':
                        d_month = '02'
                    elif d_month == 'Mar':
                        d_month = '03'
                    elif d_month == 'Apr':
                        d_month = '04'
                    elif d_month == 'May':
                        d_month = '05'
                    elif d_month == 'Jun' or d_month == 'June':
                        d_month = '06'
                    elif d_month == 'Jul' or d_month == 'July':
                        d_month = '07'
                    elif d_month == 'Aug':
                        d_month = '08'
                    elif d_month == 'Sep' or d_month == 'Sept':
                        d_month = '09'
                    elif d_month == 'Oct':
                        d_month = '10'
                    elif d_month == 'Nov':
                        d_month = '11'
                    elif d_month == 'Dec':
                        d_month = '12'

                    d_year = d_year[0:4]

                d_type = d('div.li_type_icon').attr('title')
                d_title = d('div.li_desc > p').text()
                d_url =  d('div.li_desc > p').attr('data-href')
                d_key = util.md5str(d_url)

                if d_year is None or d_year == '':
                    d_date = None
                else:
                    d_date = d_year+'-'+d_month+'-'+d_day


                develop_rank += 1
                develop_key = str(company_key)+'_'+str(develop_rank)

                if d_type == u'资本':
                    investors = d('.desc_intro').text()
                    investors = ''.join(investors)
                    try:
                        (investors,) = util.re_get_result(u'融资机构:(.*?) ;', investors)
                    except Exception, e:
                        investors = None

                    investment = None
                    round = None
                    unit = None
                    currency = ''
                    precise = 'Y'

                    funding = d_title.replace('获得','').replace('轮融资', '')
                    logger.info(funding)

                    try:
                        funding = funding.replace('元', '')
                        if u'亿' in funding:
                            f_arr = funding.split(u'亿')

                            if '.' in f_arr[0]:
                                investment = int(f_arr[0].replace('.', '')) * 1000
                            else:
                                investment = int(f_arr[0]) * 10000

                            round = f_arr[1]
                            unit = 0
                        else:
                            try:
                                (investment,) = util.re_get_result('(\d+)', funding)

                            except Exception, e:
                                pass

                            if investment is not None:
                                round = funding.split(investment)[1]


                        if currency == '美':
                            currency = 3010
                        else:
                            currency = 3020

                        if '¥'in str(investment):
                            currency = 3020
                        elif '$' in str(investment):
                            currency = 3020

                        investment = str(investment).replace('$', '').replace('¥', '')\
                            .replace('美金', '').replace('美', '')


                        if u'数' in funding:
                            precise = 'N'

                        if round is None:
                            round = funding.replace('数', '')

                        if u'千' in round:
                            investment = 1000
                            unit = 0
                        elif u'百' in round:
                            investment = 100
                            unit = 0
                        elif u'十' in round:
                            investment = 10
                            unit = 0

                        if unit == 0:
                            investment = int(investment) * 10000

                        if investment is None:
                            investment = 0

                        if investment == 0:
                            precise ='N'
                        elif investment < 1000:
                            investment = int(investment) * 10000

                        roundDesc = round

                        round = round.replace('万', '').replace('千', '').replace('百', '').replace('十', '')
                        round = round.replace('美', '')


                        if u'天使' in round:
                            round = 1010
                        elif 'Pre-A' in round:
                            round = 1020
                        elif 'A' in round:
                            round = 1030
                        elif 'B' in round:
                            round = 1040
                        elif 'C' in round:
                            round = 1050
                        elif 'D' in round:
                            round = 1060
                        elif 'E' in round:
                            round = 1070
                        elif 'F' in round:
                            round = 1080
                        else:
                            round = 0


                        logger.info(investment)
                        logger.info(round)


                        source_funding ={
                                         "sourceCompanyId": source_company_id,
                                         "preMoney": None,
                                         "postMoney": None,
                                         "investment": investment,
                                         "round": round,
                                         "roundDesc": roundDesc,
                                         "currency": currency,
                                         "precise": precise,
                                         "fundingDate": d_date,
                                 }

                        # logger.info(source_funding)

                        logger.info(investors)
                        investor_list = []
                        if investors is not None:
                            investors_arr = investors.split(',')
                            investor_key = 0
                            for investor in investors_arr:
                                if investor != '':
                                    investor_key += 1
                                    sourceId = str(company_key)+'_'+ str(investor_key)

                                    if '个人' in investor:
                                        type = 10010
                                    else:
                                        type = 10020

                                    invstor_content = {'source': source,
                                                       'sourceId': sourceId,
                                                       'logo_url': None,
                                                       'name': investor,
                                                       'website': None,
                                                       'description': None,
                                                       'stage': None,
                                                       'field': None,
                                                       'type': type,
                                                       'source': source,
                                                       'sourceId': sourceId
                                                       }

                                    investor_list.append(invstor_content)

                        parser_util.insert_source_funding(source_funding, investor_list)


                    except Exception,e:
                        logger.exception(e)

                    continue

                if d_type == u'其他':
                    # news
                    if d_url is not None or d_url != '' or len(d_url) > 10:
                        try:
                            r = requests.get(d_url, timeout= 10)
                            r.encoding = r.apparent_encoding
                            content = r.text

                            # print content[0:500]

                            source_news = {"source": source,
                                           "news_key": d_key,
                                           "company_key": company_key,
                                           "url": d_url,
                                           "title": d_title,
                                           "date": d_date,
                                           "domain": 'lagou',
                                           "content": content
                                            }

                            parser_util.insert_source_news(source_news)
                        except Exception,e :
                            pass

                    #footprint
                    else:
                        source_footprint = {"source": source,
                                            "sourceCompanyId": source_company_id,
                                            "footDate":d_date,
                                            "description": d_title,
                                    }
                        parser_util.insert_source_footprint(source_footprint)
예제 #14
0
def insert(name):
    name = name.replace("(开业)","")
    sourceId = util.md5str(name)
    sid = parser_db_util.save_company_fullName(name,13097,sourceId)
    logger.info("sid:%s->sourceId:%s",sid, sourceId)
    parser_db_util.save_source_company_name(sid, name, 12010)
예제 #15
0
def process(org):
    if org["coldcall_imap_server"] is None:
        return

    logger.info("orgId: %s, orgName: %s", org["id"], org["name"])

    re_name = re.compile(
        '([\[\(] *)?(RE?S?|FYI|RIF|I|FS|VB|RV|ENC|ODP|PD|YNT|ILT|SV|VS|VL|AW|WG|ΑΠ|ΣΧΕΤ|ΠΡΘ|תגובה|הועבר|主题|转发|FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$',
        re.IGNORECASE)

    while True:
        msgs = email_reader.receive(org["coldcall_imap_server"],
                                    org["coldcall_imap_port"],
                                    org["coldcall_username"],
                                    org["coldcall_password"],
                                    one=True)
        if len(msgs) == 0:
            break

        for msg in msgs:
            if msg["html"] is not None:
                parser = html2text.HTML2Text()
                parser.ignore_emphasis = True
                parser.single_line_break = True
                msg["html_text"] = parser.handle(msg["html"])
            else:
                msg["html_text"] = None

            logger.info(msg["subject"])
            logger.info(msg["from"])
            logger.info(msg["to"])
            logger.info(msg["cc"])
            # logger.info(msg["body"])
            # logger.info(msg["html_text"])
            logger.info("attachments=%d" % len(msg["attachments"]))
            for attach in msg["attachments"]:
                logger.info(attach.name)

            title = re_name.sub('', msg["subject"]).strip()
            title_md5 = util.md5str(title)

            #insert
            conn = db.connect_torndb()
            cc = conn.get(
                "select * from sourcedeal where orgId=%s and titleMd5=%s and origin=%s limit 1",
                org["id"], title_md5, msg["from"])
            conn.close()
            if cc is not None:
                logger.info("%s Exists!" % title)
                continue

            content = msg["html_text"]
            if content is None:
                content = msg["body"]
            if content is None:
                content = ""
            content = content.strip()
            if len(content) > 20000:
                content = content[0:20000]

            sponsor_id = find_user(org["id"], msg["from"])
            logger.info("sponsor_id=%s" % sponsor_id)
            assignee_id = find_user(org["id"], msg["cc"])
            logger.info("assignee_id=%s" % assignee_id)

            conn = db.connect_torndb()
            cc_id = conn.insert(
                "insert sourcedeal(title,titleMd5,content,orgId,createTime,origin,assignee,sponsor) \
                                                values(%s,%s,%s,%s,%s,%s,%s,%s)",
                title, title_md5, content, org["id"], msg["date"], msg["from"],
                assignee_id, sponsor_id)

            if assignee_id is None:
                ids = get_investment_manager_ids(org["id"])
                assignee_id = choice(ids)
                conn.update("update sourcedeal set assignee=%s where id=%s",
                            assignee_id, cc_id)
                conn.insert(
                    "insert sourcedeal_forward(sourcedealId,toUserId,createTime) "
                    "values(%s,%s,%s)", cc_id, assignee_id, msg["date"])
            else:
                conn.insert(
                    "insert sourcedeal_forward(sourcedealId,fromUserId,toUserId,createTime) "
                    "values(%s,%s,%s,%s)", cc_id, sponsor_id, assignee_id,
                    msg["date"])

            for attach in msg["attachments"]:
                if attach.name is not None and attach.name.strip() != "":
                    name = attach.name.strip()
                    if not name.lower().endswith("pdf") and \
                            not name.lower().endswith("rar") and \
                            not name.lower().endswith("zip") and \
                            not name.lower().endswith("7z") and \
                            not name.lower().endswith("ppt") and \
                            not name.lower().endswith("pptx") and \
                            not name.lower().endswith("doc") and \
                            not name.lower().endswith("docx") and \
                            not name.lower().endswith("xls") and \
                            not name.lower().endswith("xlsx"):
                        continue

                    (content_type, encoding) = mimetypes.guess_type(name)
                    if content_type is None:
                        content_type = "application/octet-stream"
                    data = attach.getvalue()
                    # mongo = db.connect_mongo()
                    # imgfs = gridfs.GridFS(mongo.gridfs)
                    # logo_id = imgfs.put(data, content_type=content_type, filename=name)
                    # mongo.close()
                    logo_id = util.get_uuid()
                    logger.info("gridfs logo_id=%s" % logo_id)

                    oss2 = oss2_helper.Oss2Helper()
                    headers = {"Content-Type": content_type}
                    oss2.put(str(logo_id), data, headers=headers)

                    conn.insert(
                        "insert sourcedeal_file(sourcedealId,filename,fileId,createTime) "
                        "values(%s,%s,%s,%s)", cc_id, name, logo_id,
                        msg["date"])
            conn.close()
예제 #16
0
                    msg["html_text"] = parser.handle(msg["html"])
                else:
                    msg["html_text"] = None

                logger.info(msg["subject"])
                logger.info(msg["from"])
                logger.info(msg["to"])
                logger.info(msg["cc"])
                #logger.info(msg["body"])
                #logger.info(msg["html_text"])
                logger.info("attachments=%d" % len(msg["attachments"]))
                for attach in msg["attachments"]:
                    logger.info(attach.name)

                name = re_name.sub('', msg["subject"]).strip()
                name_md5 = util.md5str(name)
                cc = conn.get(
                    "select * from coldcall where organizationId=%s and nameMd5=%s limit 1",
                    org["id"], name_md5)
                if cc is not None:
                    logger.info("%s Exists!" % name)
                    continue

                content = msg["html_text"]
                if content is None:
                    content = msg["body"]
                if content is None:
                    content = ""
                content = content.strip()

                cc_id = conn.insert(
예제 #17
0
def parserDevelop_save(source_company_id, item):
    if item is None:
        return
    logger.info("*** Development ***")
    html = item["content"]
    d = pq(html)
    #news & footprint
    lis = d('.history_ul > li')
    develop_rank = 0
    if len(lis) > 0:
        d_date = None
        for li in lis:
            try:
                d = pq(li)
                d_day = d('.date_day').text()
                d_year = d('.date_year').text()
                d_month = None
                if d_year is not None:
                    d_month = d_year[5:].strip()
                    if d_month == 'Jan':
                        d_month = '01'
                    elif d_month == 'Feb':
                        d_month = '02'
                    elif d_month == 'Mar':
                        d_month = '03'
                    elif d_month == 'Apr':
                        d_month = '04'
                    elif d_month == 'May':
                        d_month = '05'
                    elif d_month == 'Jun' or d_month == 'June':
                        d_month = '06'
                    elif d_month == 'Jul' or d_month == 'July':
                        d_month = '07'
                    elif d_month == 'Aug':
                        d_month = '08'
                    elif d_month == 'Sep' or d_month == 'Sept':
                        d_month = '09'
                    elif d_month == 'Oct':
                        d_month = '10'
                    elif d_month == 'Nov':
                        d_month = '11'
                    elif d_month == 'Dec':
                        d_month = '12'

                    d_year = d_year[0:4]

                d_type = d('div.li_type_icon').attr('title')
                d_title = d('div.li_desc > p').text()
                d_url = d('div.li_desc > p').attr('data-href')
                d_key = util.md5str(d_url)

                if d_year is None or d_year == '':
                    d_date = d_date
                else:
                    d_date = d_year + '-' + d_month + '-' + d_day
                logger.info("date: %s", d_date)

                develop_rank += 1
                develop_key = str(item["key"]) + '_' + str(develop_rank)

                if d_type == u'资本':
                    pass
                    # comment Investment
                    # investors = d('.desc_intro').text()
                    # investors = ''.join(investors)
                    # #logger.info("investors_initial: %s", investors)
                    # try:
                    #     if investors.find("融资金额") >= 0:
                    #         (investors,) = util.re_get_result(u'融资机构:(.*?) ;', investors)
                    #     else:
                    #         #logger.info("find here %s",investors)
                    #         (investors,) = util.re_get_result(u'融资机构:(.*?)$', investors)
                    # except Exception, e:
                    #     investors = None
                    #
                    # investment = None
                    # round = None
                    # unit = None
                    # currency = ''
                    # precise = 'Y'
                    #
                    # funding = d_title.replace('获得','').replace('轮融资', '')
                    # logger.info(funding)
                    #
                    # try:
                    #     funding = funding.replace('元', '')
                    #     if u'亿' in funding:
                    #         f_arr = funding.split(u'亿')
                    #
                    #         if '.' in f_arr[0]:
                    #             investment = int(f_arr[0].replace('.', '')) * 1000
                    #         else:
                    #             investment = int(f_arr[0]) * 10000
                    #
                    #         round = f_arr[1]
                    #         unit = 0
                    #     else:
                    #         try:
                    #             (investment,) = util.re_get_result('(\d+)', funding)
                    #
                    #         except:
                    #             pass
                    #
                    #         if investment is not None:
                    #             round = funding.split(investment)[1]
                    #
                    #
                    #     if funding.find('美') > 0:
                    #         currency = 3010
                    #     else:
                    #         currency = 3020
                    #
                    #     if funding.find('¥') > 0:
                    #         currency = 3020
                    #     elif funding.find('$') > 0:
                    #         currency = 3010
                    #
                    #     if investment is not None:
                    #         investment = str(investment).replace('$', '').replace('¥', '')\
                    #         .replace('美金', '').replace('美', '')
                    #
                    #
                    #     if u'数' in funding:
                    #         precise = 'N'
                    #
                    #     if round is None:
                    #         round = funding.replace('数', '')
                    #
                    #     if u'千' in round:
                    #         investment = 1000
                    #         unit = 0
                    #     elif u'百' in round:
                    #         investment = 100
                    #         unit = 0
                    #     elif u'十' in round:
                    #         investment = 10
                    #         unit = 0
                    #
                    #     if unit == 0:
                    #         investment = int(investment) * 10000
                    #
                    #     if investment is None:
                    #         investment = 0
                    #
                    #     if investment == 0:
                    #         precise ='N'
                    #     else:
                    #         try:
                    #             if int(investment)< 10000:
                    #                 investment = int(investment) * 10000
                    #         except:
                    #             pass
                    #     logger.info("investment %s", investment)
                    #
                    #     round = round.replace('万', '').replace('千', '').replace('百', '').replace('十', '')
                    #     round = round.replace('美', '')
                    #
                    #     roundDesc=None
                    #     if u'天使' in round:
                    #         round = 1010
                    #         roundDesc="天使"
                    #     elif 'Pre-A' in round:
                    #         round = 1020
                    #         roundDesc = "Pre-A"
                    #     elif 'A' in round:
                    #         round = 1030
                    #         roundDesc = "A"
                    #     elif 'B' in round:
                    #         round = 1040
                    #         roundDesc = "B"
                    #     elif 'C' in round:
                    #         round = 1050
                    #         roundDesc = "C"
                    #     elif 'D' in round:
                    #         round = 1060
                    #         roundDesc = "D"
                    #     elif 'E' in round:
                    #         round = 1070
                    #         roundDesc = "E"
                    #     elif 'F' in round:
                    #         round = 1080
                    #         roundDesc = "F"
                    #     else:
                    #         round = 0
                    #
                    #
                    #     logger.info("round %s", round)
                    #     logger.info("roundDesc %s",roundDesc)
                    #
                    #
                    #     source_funding ={
                    #                      "sourceCompanyId": source_company_id,
                    #                      "preMoney": None,
                    #                      "postMoney": None,
                    #                      "investment": investment,
                    #                      "round": round,
                    #                      "roundDesc": roundDesc,
                    #                      "currency": currency,
                    #                      "precise": precise,
                    #                      "fundingDate": d_date,
                    #              }
                    #
                    #
                    #     # logger.info(source_funding)
                    #
                    #     logger.info(investors)
                    #     investor_list = []
                    #     if investors is not None:
                    #         investors = investors.replace(" " , "").replace("," , ",").replace("、" , ",")\
                    #             .replace("跟投","").replace("领投","")
                    #         investors_arr = investors.split(",")
                    #         investor_key = 0
                    #         for investor in investors_arr:
                    #             #logger.info("investor:%s", investor)
                    #             if investor.find("、") >= 0:
                    #                 continue
                    #             if investor != '':
                    #                 logger.info("investor:%s",investor)
                    #                 investor_key += 1
                    #                 sourceId = develop_key+'_'+ str(investor_key)
                    #
                    #                 if '个人' in investor:
                    #                     type = 10010
                    #                 else:
                    #                     type = 10020
                    #
                    #                 invstor_content = {
                    #                                    'logo_url': None,
                    #                                    'name': investor,
                    #                                    'website': None,
                    #                                    'description': None,
                    #                                    'stage': None,
                    #                                    'field': None,
                    #                                    'type': type,
                    #                                    'source': SOURCE,
                    #                                    'sourceId': sourceId
                    #                                    }
                    #
                    #                 investor_list.append(invstor_content)
                    #
                    #     parser_db_util.save_funding_standard(source_funding, download_crawler, investor_list)
                    #
                    #
                    #
                    # except Exception,e:
                    #     logger.exception(e)

                if d_type == u'其他':
                    logger.info("********其他")
                    # news
                    if d_url is not None and d_url.strip() != '' and len(
                            d_url) > 10:
                        logger.info(d_url)
                        pass
                        '''
                        try:
                            r = requests.get(d_url, timeout= 10)
                            r.encoding = r.apparent_encoding
                            content = r.text

                            # print content[0:500]

                            source_news = {"source": source,
                                           "news_key": d_key,
                                           "company_key": company_key,
                                           "url": d_url,
                                           "title": d_title,
                                           "date": d_date,
                                           "domain": 'lagou',
                                           "content": content
                                            }

                            parser_util.insert_source_news(source_news)
                        except Exception,e :
                            pass
                        '''
                    #footprint
                    else:
                        logger.info("FOOTPRINT")
                        if d_date is None:
                            continue
                        source_footprint = [{
                            "source": SOURCE,
                            "sourceCompanyId": source_company_id,
                            "footDate": d_date,
                            "footDesc": d_title,
                        }]
                        logger.info(
                            json.dumps(source_footprint,
                                       ensure_ascii=False,
                                       cls=util.CJsonEncoder))
                        parser_db_util.save_footprints(source_company_id,
                                                       source_footprint)

            except Exception, ex:
                logger.exception(ex)