Exemplo n.º 1
0
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        d = None
        if self._debug:
            first_name = m(row, 'first_name', str)
            last_name = m(row, 'last_name', str)
            cred_string = m(row, 'license', str)
            d = "{:<30} {}".format(first_name + " " + last_name, cred_string)

        creds = CredentialParser(row['license'], d)

        if self._debug:
            self._credentials.append(creds)

        for degree in creds.valid_degrees:
            if degree in self._degree_map:
                provider.degrees.append(self._degree_map[degree])
            else:
                print('WARNING: No degree record for', degree)

        for cred in creds.valid_credentials:
            if cred in self._credential_map:
                provider.credentials.append(self._credential_map[cred])
            else:
                print('WARNING: no cred record for', cred)

        for modality_name in creds.modalities:
            record: Modality = self._session.query(Modality).filter_by(
                name=modality_name).options(load_only('id')).one_or_none()

            if not record:
                record = Modality(name=modality_name)
                self._session.add(record)

            provider.modalities.append(record)
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        for number in m(row, 'license_number', str, "").split(";"):
            if number:
                self._do_license(number, provider)

        for number in m(row, 'certificate_number', str, "").split(";"):
            if number:
                self._do_cert(number, provider)
Exemplo n.º 3
0
    def merge(self, row: OrderedDict):
        ln: str = m(row, 'last_name', str, "")
        assert ln, "no last name"

        fn: str = m(row, 'first_name', str, "")
        fn = "".join(fn.replace(".", "").split()).lower()

        ln = "".join(ln.replace(".", "").split()).lower()
        if self.last_name:
            if ln != self.last_name:
                if not fn:
                    raise Exception("No fn, differing last name")
                prospective_full_name = fn + ln
                if prospective_full_name not in self.full_names:
                    raise Exception("differing last name merge")
        else:
            self.last_name = ln

        row_id: int = m(row, 'id', int)
        assert row_id, "no row id"
        self.ids.add(row_id)

        raw_addresses: str = m(row, 'address', str)
        if raw_addresses:
            for address in PhoneAddyMunger.parse_raw_address(raw_addresses):
                z = REDIS.hget(ZIP_HASH, address)
                if z:
                    self.zips.add(z.decode())

        cert_number = m(row, 'certificate_number', str)
        if cert_number:
            self.certificates.add(cert_number)

        license_number = m(row, 'license_number', str)
        if license_number:
            clean, code, is_nysop = LicenseCertMunger.clean_up_nysop_number(
                license_number)
            if is_nysop:
                if clean in self.licenses:
                    self.licenses[clean].add(code)
                else:
                    self.licenses[clean] = {code}

        directory_id: str = m(row, 'directory_id', str, None)
        if not directory_id:
            directory_id = m(row, 'payor_id', str, None)
            if not directory_id:
                raise Exception("XXX", "no directory or payor")
        self.directories.add(directory_id)

        if fn:
            if not self.first_name:
                self.first_name = fn
            self.first_initials.add(fn[:self.INITIAL_COUNT])
            self.full_names.add(fn + self.last_name)

        # Parse credentials
        self.credentials = CredentialParser(row['license'], str(row_id))

        self.rows.append(row)
Exemplo n.º 4
0
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw: str = m(row, 'accepted_payors', str)

        if not raw:
            return

        replaced_raw = raw.strip().lower() \
            .replace(":", ' ') \
            .replace(")", ' ') \
            .replace("(", ' ') \
            .replace('"', ' ') \
            .replace("/", ';') \
            .replace("out-of-network", 'oon') \
            .replace("out of network", 'oon') \
            .replace("oon -", 'oon ') \
            .replace("oon-", 'oon ') \
            .replace("oon", ";oon;") \
            .replace(".", ';') \
            .replace(",", ';') \
            .replace("=", ' ') \
            .replace("|", ';') \
            .replace("*", ' ') \
            .replace("&", ' and ') \
            .replace("+", ' ')

        replaced_raw = OrientationMunger.MULTI_WHITESPACE_STRIP \
            .sub(' ', replaced_raw)

        if not replaced_raw:
            return

        added = set()

        records = []

        for token in replaced_raw.split(';'):
            token = token.strip()

            if not token:
                continue

            if token in added:
                continue

            apc: AcceptedPayorComment = self._session.query(
                AcceptedPayorComment).filter_by(body=token).options(
                    load_only('id')).one_or_none()

            if not apc:
                apc = AcceptedPayorComment(body=token)
                self._session.add(apc)

            records.append(apc)
            added.add(token)

        already = {x for x in provider.accepted_payor_comments}

        for record in records:
            if record not in already:
                provider.accepted_payor_comments.append(record)
Exemplo n.º 5
0
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        apm = m(row, 'accepted_plan_ids', str)

        if not apm:
            return

        for plan_id in apm.split(';'):
            provider.plans_accepted.append(self._id_map[int(plan_id)])
    def update_website(self, rows) -> None:
        query: text = text("""
        SELECT website_url FROM monday.provider WHERE id = :id
        """)

        update_query: text = text("""
        UPDATE monday.provider SET website_url = :val WHERE id = :id
        """)

        params = {'val': "", 'id': 0}

        updated = 0
        i = 0
        bar = progressbar.ProgressBar(max_value=len(rows), initial_value=i)
        for row in rows:
            url: str = m(row, 'website_url', str)
            if not url:
                i += 1
                bar.update(i)
                continue

            # directory_id: int = m(row, 'directory_id', int)

            row_id = m(row, 'id', int)
            assert row_id, "there must be a row id"

            canonical_id: int = int(self._r.hget(ROW_ID_HASH, row_id))

            for rrow in self._session.execute(query, {"id": canonical_id}):
                if not rrow['website_url']:
                    params['val'] = url
                    params['id'] = canonical_id
                    self._session.execute(update_query, params)
                    updated += 1

            if updated % 250:
                self._session.commit()

            i += 1
            bar.update(i)

        self._session.commit()

        print()
        print("Updated", updated, "rows.")
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw: str = m(row, 'languages', str)

        if not raw:
            return

        found = {self._records['english']}

        replaced_raw = raw.lower() \
            .replace(" and ", ";") \
            .replace("bilingual", "") \
            .replace("proficient", "") \
            .replace("conversational", "") \
            .replace("native", "") \
            .replace("speaker", "") \
            .replace("(", "") \
            .replace(")", "") \
            .replace("&", ";") \
            .replace(":", ";") \
            .replace("/", ";")

        for token in replaced_raw.split(';'):
            lang_name: str = token.strip()

            if not lang_name:
                continue

            if len(lang_name) < 2:
                continue

            r = self._process_token(lang_name)

            # This means we know this is a bad token
            if r is True:
                continue

            # Try one more strategy
            if r is False and lang_name.find('-') > -1:
                lang_name = lang_name.replace('-', '').strip()
                r = self._process_token(lang_name)
                if r is True:
                    continue

            # Means we missed it
            if r is False:
                self._missed(lang_name, raw)
            else:
                found.add(r)

        if len(found) < 1:
            return

        already = {x for x in provider.languages}

        for lang in found:
            if lang not in already:
                provider.languages.append(lang)
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw_address = m(row, 'address', str)
        raw_phone = m(row, 'phone', str)

        # A little hack
        directory_id: Union[int, None] = m(row, 'directory_id', int, None)
        if not directory_id:
            directory_id = None

        # Break up the addy and phones and insert them
        if raw_address:
            for address in self._cleanup_addresses(raw_address):
                address.directory_id = directory_id
                provider.addresses.append(address)

        if raw_phone:
            for number in self.cleanup_phone_numbers(raw_phone):
                number.directory_id = directory_id
                provider.phone_numbers.append(number)
Exemplo n.º 9
0
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw: str = m(row, 'works_with_ages', str)

        if not raw:
            return

        found_age_groups = set()
        ranges: Set[NumericRange] = set()

        for token in raw.lower().split(';'):
            token = token.strip()
            if not token:
                continue

            if token in self.AGE_NAMES:
                found_age_groups.add(token)
                if token.find("(") > -1:
                    continue

            if token.find("(") < 0:
                continue

            inside = token[token.find("(") + 1:token.find(")")]

            # range cache hit
            if inside in self._range_cache:
                ranges.add(self._range_cache[inside])
                continue

            sub_tokens = inside.split("to")

            # range cache miss
            if len(sub_tokens) > 1:
                val = NumericRange(int(sub_tokens[0]),
                                   int(sub_tokens[1]),
                                   bounds=self.RANGE_BOUNDS)
                self._range_cache[inside] = val
                ranges.add(val)
                continue

            sub_token = sub_tokens[0]

            if sub_token[-1:] == "+":
                val = NumericRange(int(sub_token[:-1]),
                                   999,
                                   bounds=self.RANGE_BOUNDS)
                self._range_cache[inside] = val
                ranges.add(val)
                continue

            # Missed!
            self._missed.add(token)

        provider.age_groups = list(found_age_groups)
        provider.age_ranges = list(ranges)
    def update_began_practice(self, rows) -> None:
        query: text = text("""
        UPDATE monday.provider SET began_practice = :val WHERE id = :id
        """)

        current_year: int = datetime.datetime.now().year

        params: dict = {'val': None, 'id': None}

        updated = 0
        i = 0
        bar = progressbar.ProgressBar(max_value=len(rows), initial_value=i)
        for row in rows:
            yip: int = m(row, 'years_in_practice', int)
            if not yip or yip < 1:
                i += 1
                bar.update(i)
                continue

            row_id = m(row, 'id', int)
            assert row_id, "there must be a row id"

            canonical_id: int = int(self._r.hget(ROW_ID_HASH, row_id))

            params['val'] = current_year - yip
            params['id'] = canonical_id

            self._session.execute(query, params)

            updated += 1

            if updated % 250:
                self._session.commit()

            i += 1
            bar.update(i)

        self._session.commit()

        print()
        print("Updated", updated, "rows.")
Exemplo n.º 11
0
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        apm = m(row, 'accepted_payment_methods', str)

        if not apm:
            return

        for methods in apm.split(';'):
            method = methods.strip().lower()
            if not methods:
                print(method, row)
                continue
            provider.payment_methods.append(self._id_map[method])
Exemplo n.º 12
0
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw: str = m(row, 'works_with_groups', str)

        if not raw:
            return

        replaced_raw = raw.strip().lower() \
            .replace(":", ' ') \
            .replace(")", ' ') \
            .replace("(", ' ') \
            .replace('"', ' ') \
            .replace("'", ' ') \
            .replace("/", ';') \
            .replace("&", ' and ') \
            .replace("-", ' ') \
            .replace(".", ' ') \
            .replace(",", ' ') \
            .replace("+", ' ')

        replaced_raw = OrientationMunger.MULTI_WHITESPACE_STRIP \
            .sub(' ', replaced_raw)

        if not replaced_raw:
            return

        added = set()

        records = []

        for token in replaced_raw.split(';'):
            token = token.strip()

            if not token:
                continue

            if token in added:
                continue

            group: Group = self._session.query(Group).filter_by(
                body=token).options(load_only('id')).one_or_none()

            if not group:
                group = Group(body=token)
                self._session.add(group)

            records.append(group)
            added.add(token)

        already = {x for x in provider.groups}

        for record in records:
            if record not in already:
                provider.groups.append(record)
Exemplo n.º 13
0
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw: str = m(row, 'treatment_orientations', str)

        if not raw:
            return

        replaced_raw = raw.strip().lower() \
            .replace(":", ' ') \
            .replace(")", ' ') \
            .replace("(", ' ') \
            .replace("/", ' ') \
            .replace("&", ' and ') \
            .replace("-", ' ') \
            .replace(".", ' ') \
            .replace(",", ' ') \
            .replace("+", ' ')

        replaced_raw = self.MULTI_WHITESPACE_STRIP.sub(' ', replaced_raw)

        added = set()

        records = []

        for token in replaced_raw.split(';'):
            token = token.strip()

            if not token:
                continue

            if token in added:
                continue

            orientation: Orientation = self._session.query(
                Orientation).filter_by(body=token).options(
                    load_only('id')).one_or_none()

            if not orientation:
                orientation = Orientation(body=token)
                self._session.add(orientation)

            records.append(orientation)
            added.add(token)

        already = {x for x in provider.treatment_orientations}

        for record in records:
            if record not in already:
                provider.treatment_orientations.append(record)
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw: str = m(row, 'modalities', str, "")

        # services from GT should be parsed in the same way as modality
        directory_id: int = m(row, "directory_id", int)
        if directory_id == 3:
            raw += " " + m(row, "services", str, "")

        if not raw:
            return

        replaced_raw = raw.strip().lower() \
            .replace(":", ' ') \
            .replace("&", ' and ') \
            .replace(")", ' ') \
            .replace("(", ' ') \
            .replace('"', ' ') \
            .replace("-", ' ') \
            .replace(".", ' ') \
            .replace(",", ' ') \
            .replace("=", ' ') \
            .replace("|", ' ') \
            .replace("®", '') \
            .replace('©', '') \
            .replace('†', '') \
            .replace("*", ' ') \
            .replace("for ", ' ') \
            .replace("rational emotive behavioral", 'rebt') \
            .replace("rational emotive behavior", 'rebt') \
            .replace("psychotherapies", "psychotherapy") \
            .replace('therapy', ';therapy;') \
            .replace('psychotherapy', ';psychotherapy;') \
            .replace('psychology', ';psychology;') \
            .replace('psychoanalysis', ';psychoanalysis;') \
            .replace('couples', ';couples;') \
            .replace('couple', ';couples;') \
            .replace('/', ';') \
            .replace("+", ' ')

        replaced_raw = OrientationMunger.MULTI_WHITESPACE_STRIP \
            .sub(' ', replaced_raw)

        replaced_raw = self.PSYCHO_SUFFIX.sub(';psychotherapy;', replaced_raw)
        """
        for token in replaced_raw.split(';'):
            token = token.strip()
            self._found.add(token)
        """

        if not replaced_raw:
            return

        added = set()

        records = []

        for token in replaced_raw.split(';'):
            token = token.strip()

            if not token:
                continue

            if token in added:
                continue

            modality: Modality = self._session.query(Modality).filter_by(
                name=token).options(load_only('id')).one_or_none()

            if not modality:
                modality = Modality(name=token)
                self._session.add(modality)

            records.append(modality)
            added.add(token)

        already = {x for x in provider.modalities}

        for record in records:
            if record not in already:
                provider.modalities.append(record)
    def process_row(self, row: OrderedDict, provider: Provider) -> None:
        raw = m(row, 'specialties', str)

        if not raw:
            return

        # clean it up
        processed = raw.lower().replace("--", " ") \
            .replace("(", "") \
            .replace(")", "") \
            .replace("'", "") \
            .replace('"', '') \
            .replace('.', '') \
            .replace(",", ";")

        found = set()

        for token in processed.split(';'):
            token = token.strip()

            # Edge case, no token
            if not token:
                continue

            if len(token) < 3:
                continue

            # Edge case, we already know there are no specialties for this str
            if token in self._unknown_keys:
                continue

            # Have we never encountered this string before?
            if token not in self._cache:
                detected_specialties = set()
                # If not, test it against all regexes
                for pattern, specialty in COMPILED_REGEXPS:
                    s_record = self._records[specialty]
                    # If we've already added this specialty dont bother matching
                    if s_record in detected_specialties:
                        continue
                    if pattern.search(token):
                        # If it matches record that fact in the cache
                        detected_specialties.add(s_record)
                # Save to the cache to avoid doing this again
                self._cache[token] = detected_specialties
                # If we detected nothing, continue on
                if len(detected_specialties) == 0:
                    self._unknown_keys.add(token)
                    continue
            else:
                self._cache_hits += 1
                detected_specialties = self._cache[token]

            # Save these as having been detected for this provider
            found.update(detected_specialties)

        # Edge case: nothing found
        if len(found) == 0:
            return

        already = {x for x in provider.specialties}

        # Reconcile by doing set disjunction
        for record in found:
            if record not in already:
                provider.specialties.append(record)
Exemplo n.º 16
0
    def process_providers(self, tables: Mapping[str, RawTable],
                          update_columns: bool) -> None:

        for plugin in self._plugins:
            plugin.pre_process()

        table = tables['provider_records']
        columns, rows = table.get_table_components()

        directories: MutableMapping[int, Set[int]] = {}

        current_year: int = datetime.datetime.now().year

        i = 0
        bar = progressbar.ProgressBar(max_value=len(rows), initial_value=i)
        for row in rows:

            row_id = m(row, 'id', int)

            assert row_id, "there must be a row id"

            directory_id: Union[int, None] = m(row, 'directory_id', int)

            canonical_id: int = int(self._r.hget(ROW_ID_HASH, row_id))

            # Does this provider exist?
            provider: Provider = self._session.query(Provider).filter_by(
                id=canonical_id).one_or_none()

            if not provider or update_columns:
                dirs: Union[int, None] = directories.get(canonical_id, None)
                args = {}

                for row_name, row_params in self.ROW_FIELDS.items():
                    coercer, priorities = row_params

                    # @TODO: NOTE!! If a higher priority row had no value, lower
                    # @TODO: priority rows WITH values will skip!! I am highly
                    # @TODO: suspicious that this will work for all cases!!
                    if dirs:
                        skip = False
                        for priority in priorities:
                            # Are we this priority?
                            if directory_id == priority:
                                break
                            # Do we already have a higher priority?
                            if priority in dirs:
                                skip = True
                                continue
                        if skip:
                            continue

                    # Get the value
                    coerced_value = m(row, row_name, coercer)

                    # This check is important because we want fields that are
                    # set to null to not overwrite existing fields from other
                    # record sources
                    if coerced_value is not None:
                        args[row_name] = coerced_value

                # Regardless of the outcome, we can still merge a new record
                args['id'] = canonical_id
                provider: Provider = Provider(**args)
                provider = self._session.merge(provider)

                # A special case
                yip: int = m(row, 'years_in_practice', int)
                if not yip or yip < 1:
                    provider.began_practice = current_year - yip

            # Relate the provider to the directory
            if directory_id and directory_id in self._directory_map:
                found = False
                for directory in provider.directories:
                    if directory.id == directory_id:
                        found = True
                        break
                if not found:
                    provider.directories.append(
                        self._directory_map[directory_id])

            # Do all the plugins
            for plugin in self._plugins:
                plugin.process_row(row, provider)

            # Save the directories processed for this canonical ID so that when
            # we find another one we can evaluate priority
            if canonical_id not in directories:
                directories[canonical_id] = {directory_id}
            else:
                directories[canonical_id].add(directory_id)

            self._session.commit()
            i += 1
            bar.update(i)

        self._session.flush()

        for plugin in self._plugins:
            plugin.post_process()