# implied. # See the License for the specific language governing permissions and # limitations under the License. from email import utils as email_utils import re import six from six.moves import http_client from six.moves.urllib import parse from stackalytics.openstack.common import log as logging from stackalytics.processor import utils LOG = logging.getLogger(__name__) EMAIL_HEADER_PATTERN = ('From \S+(?: at \S+)?\s+' '\w{3}\s+\w{3}\s+\d{1,2}\s+\d{2}:\d{2}(?::\d{2})?' '(?:\s+\S+)?\s+\d{4}.*?\n') MAIL_BOX_PATTERN = re.compile( '^' + EMAIL_HEADER_PATTERN + 'From: (?P<author_email>\S+(?: at \S+))' '(?:\W+(?P<author_name>\w+(?:\s\w+)*))?.*?\n' 'Date: (?P<date>.*?)\n' 'Subject: (?P<subject>.*?)(?=\n\S+:)' '.*?Message-ID: (?P<message_id>\S+)\n' '\n(?P<body>.*?)\n' '(?=' + EMAIL_HEADER_PATTERN + 'From: )', flags=re.MULTILINE | re.DOTALL)
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import re import time import six from stackalytics.openstack.common import log as logging from stackalytics.processor import utils LOG = logging.getLogger(__name__) NAME_AND_DATE_PATTERN = r'<h3>(?P<member_name>[^<]*)[\s\S]*?' \ r'<div class="span-7 last">(?P<date_joined>[^<]*)' COMPANY_PATTERN = r'<strong>Date\sJoined[\s\S]*?<b>(?P<company_draft>[^<]*)' \ r'[\s\S]*?From\s(?P<date_from>[\s\S]*?)\(Current\)' CNT_EMPTY_MEMBERS = 50 def _convert_str_fields_to_unicode(result): for field, value in result.iteritems(): if type(value) is str: try: value = six.text_type(value, 'utf8') result[field] = value