Пример #1
0
    def _granulateDate(self, str_date):
        """returns only date parts that are succesfully parsed."""
        di_1 = parseDate(
            str_date,
            ignoretz=True,
            fuzzy=True,
            default=datetime(1900, 12, 28, 0, 0)
        )  #1900-12-28 default year, month and day. (day 28 exists for every month:-)
        di_2 = parseDate(
            str_date,
            ignoretz=True,
            fuzzy=True,
            default=datetime(2000, 01, 01, 0,
                             0))  #2000-01-01 default year, month and day.

        ## Parsed date with no defaults used:
        if str(di_1.date()) == str(di_2.date()):
            return str(
                di_1.date())  # Dates are the same: date is parsed completely.

        ## Check for dft day and month:
        if di_1.date().day != di_2.date().day and di_1.date(
        ).month != di_2.date().month:
            return str(
                di_1.date().year)  # Only year has been parsed succesfully.
        if di_1.date().day != di_2.date().day and di_1.date(
        ).month == di_2.date().month:
            return ('%s-%s') % (
                di_1.date().year, di_1.date().month
            )  # Only year and month have been parsed succesfully.
Пример #2
0
 def isDate(self, string):
     """ Returns True if string is a valid date format """
     try:
         parseDate(string)
         return True
     except ValueError:
         return False
Пример #3
0
 def _validateISO8601(self, datestring):
     ## See: http://labix.org/python-dateutil
     if datestring is None: return False
     
     try:
         parseDate(datestring, ignoretz=True, fuzzy=True)
     except ValueError:
         return False
     return True
Пример #4
0
    def __load(self):

        if path.exists(self.__filePath):
            with open(self.__filePath, mode="r", newline="") as file:
                reader = csv.reader(file, delimiter=self.__delimiter)
                for item in reader:
                    recordDate = parseDate(item[0])
                    event = item[1]
                    when = parseDate(item[2])
                    version = item[3]
                    parsed = (recordDate, event, when, version)
                    self.__items.append(parsed)
Пример #5
0
    def __load(self):

        if path.exists(self.__filePath):
            with open(self.__filePath, mode="r", newline="") as file:
                reader = csv.reader(file, delimiter=self.__delimiter)
                for item in reader:
                    recordDate = parseDate(item[0])
                    event = item[1]
                    when = parseDate(item[2])
                    version = item[3]
                    parsed = (recordDate, event, when, version)
                    self.__items.append(parsed)
Пример #6
0
    def getEpisodes(self):
        # @todo Store last_sync_date
        days      = 15
        now       = datetime.now().replace(tzinfo=pytz.UTC)
        fromdate  = (now - timedelta(days=days-1)).strftime('%Y-%m-%d')

        response  = self.request("calendars/my/shows/%s/%s" % (fromdate,days))

        result    = []

        for episode in response:
            # Make sure the episode has aired
            if parseDate(episode["first_aired"]) < now:
                result.append({
                    "type":             "show",
                    "id":               episode["show"]["ids"]["tvdb"],
                    "episode_id":       episode["episode"]["ids"]["tvdb"],
                    "title":            episode["show"]["title"],
                    "episode_title":    episode["episode"]["title"],
                    "year":             episode["show"]["year"],
                    "season":           episode["episode"]["season"],
                    "episode":          episode["episode"]["number"],
                })

        return result
Пример #7
0
 def get_element(element):
   text = "".join(n.data for n in element.childNodes if n.nodeType == n.TEXT_NODE)
   
   try:
     entry_type = element.getAttribute("type")
     
     if entry_type == "integer":
       try:
         return int(text)
       except ValueError:
         return 0
     
     elif entry_type in ("date", "datetime"):
       return parseDate( text )
     
     elif entry_type == "boolean":
       try:
         return text.strip().lower() in ("true", "1")
       except ValueError:
         return False
     
     elif entry_type == "decimal":
       try:
         return float(text)
       except ValueError:
         return 0.0
     
     else:
       return text
   
   except:
     return text
def getDate_text(front, lastPage):

    #Flatten front; when split by WC splitter, it comes out as a list of strings
    front = flatten(front)
    if len(front) == 0: return ''

    dateString = ''
    date = ''
    dateList = []

    # First, see if there is a "Date Decided" date, which is authoritative
    if dateDecidedRe0.search(front.lower()) != None:
        dateString = dateDecidedRe0.search(front.lower()).group()
    elif dateDecidedRe1.search(front.lower()) != None:
        dateString = dateDecidedRe1.search(front.lower()).group()

    # Second, see if there is a 'FILED' date (note this one is case sensitive, so no .lower())
    if len(dateString) == 0:
        if dateFiledRe0.search(front) != None:
            dateString = dateFiledRe0.search(front).group()

    # Third, see if there is a 'Dated:" date in the frontmatter
    if len(dateString) == 0:
        if dateDatedRe0.search(front.lower()) != None:
            dateString = dateDatedRe0.search(front.lower()).group()
        elif dateDatedRe1.search(front.lower()) != None:
            dateString = dateDatedRe1.search(front.lower()).group()

    # Fourth, see if there is a 'Final Report:" date in the frontmatter
    if len(dateString) == 0:
        if finalReportDateRe0.search(front.lower()) != None:
            dateString = finalReportDateRe0.search(front.lower()).group()

    # Fifth, see if there is a 'Dated:" or ("Date:") date in the last page
    back = flatten(lastPage)
    if len(dateString) == 0:
        if dateDatedRe0.search(back.lower()) != None:
            dateString = dateDatedRe0.search(back.lower()).group()
        elif dateDatedRe1.search(back.lower()) != None:
            dateString = dateDatedRe1.search(back.lower()).group()
        elif dateDatedRe2.search(back.lower()) != None:
            dateString = dateDatedRe2.search(back.lower()).group()

    # Sixth, if no authoritative phrase, make a list of all dates in frontmatter
    if len(dateString) == 0:
        if standardDateRe0.search(front.lower()) != None:
            dateList.append(standardDateRe0.search(front.lower()).group())
        if standardDateRe1.search(front.lower()) != None:
            dateList.append(standardDateRe1.search(front.lower()).group())
        # ARBITRARILY CHOOSES FIRST OF ALL DATES IN THE DATELIST
        if len(dateList) > 0: dateString = dateList[0]

    # LAST, if possible, convert final results of date search
    # into MySQL formatted date (string: 'yyyy-mm-dd')
    if isDate(dateString):
        date = parseDate(dateString, fuzzy=True).date().isoformat()
    else:
        date = '0001-01-01'  # Default date must be compliant with mysql date format

    return date
def pollAll(blogRecList,itemRecList):
	for blogRec in blogRecList:
		if blogRec['delete'] == False:
			pollRes = poll(blogRec)
			parseRes = parseXML(pollRes)
			if parseRes['blogRec']['status'] == True:
				blogRec = parseRes['blogRec']
				firstTime = blogRec['firstTime']
				items = parseRes['items']
				for item in items:
					if item['pubDate'] != None:
						pDate = parseDate(item['pubDate'])
						if firstTime or pDate.date() == datetime.today():
							newItemRec = {}
							newItemRec['title'] = item['title']
							newItemRec['pubDate'] = item['pubDate']
							newItemRec['link'] = item['link']
							newItemRec['blogUrl'] = blogRec['url']
							newItemRec['isNew'] = True
							isExist = False
							for itemRec in itemRecList:
								if itemRec['link'] == newItemRec['link']:
									isExist = True
									break
							if isExist == False:
								itemRecList.append(newItemRec)
				if firstTime:
					blogRec['firstTime'] = False
						
	print(blogRecList)
	print(itemRecList)
Пример #10
0
 def get_element(element):
     text = ''.join( n.data for n in element.childNodes if n.nodeType == n.TEXT_NODE )
     try:
         entry_type = element.getAttribute('type')
         if entry_type == 'integer':
             try:
                 return int( text )
             except ValueError:
                 return 0
         elif entry_type in ('date','datetime'):
             return parseDate( text )
         elif entry_type == 'boolean':
             try:
                 return text.strip().lower() in ('true', '1')
             except ValueError:
                 return False
         elif entry_type == 'decimal':
             try:
                 return float( text )
             except ValueError:
                 return 0.0
         else:
             return text
     except:
         return text
Пример #11
0
def getDate(text):
    match = re.search(r"[a-z]{2,12}\.?\s?\d{1,2}\,\s?\d{2,4}", text.lower())
    if match: 
        try:
            return parseDate(match[0])
        except ValueError:
            return ""
    return ""
Пример #12
0
 def parser(self, value):
     try:
         return parseDate(value)
     except:
         raise ParseError(
             message="\"{}\" is not a valid datetime".format(value)
         )
     return value
Пример #13
0
    def __processFacebookDoc(self, doc):
        '''Converts a mongo document into user dictionary for matcher.
      
      It appends the headline to biography of the user.
      '''
        user = {}
        if doc == None:
            return None

        # Commons
        user['username'] = doc['_id']
        user['name'] = doc['name']
        user['location'] = doc['location']
        user['website'] = doc['website']
        user['bio'] = doc['bio'] + doc[
            'site']  # 'site' is like a short biography
        if doc['photo'] != "" and doc['photo'] != None:
            user['profileImage'] = doc['photo']
        else:
            user['profileImage'] = ""
        try:
            user['matched'] = doc['matched']
        except:
            user['matched'] = None
        user['sourceCollection'] = FACEBOOK
        try:
            user['bornAt'] = parseDate(
                doc['birthdate'][:-10])  # remove Born and convert to date
        except:
            user['bornAt'] = None

        # Specials
        try:
            user['friends'] = list(map(lambda f: f[25:], doc['friends']))
        except:
            user['friends'] = []
        if doc['bg'] != "" and doc['bg'] != None:
            user['backgroundImage'] = doc['bg']
        else:
            user['backgroundImage'] = ""
        user['education'] = ""
        if ('education' in doc and len(doc['education']) > 0):
            user['education'] += doc['education'][0] + "\n"
        if ('education1' in doc and len(doc['education1']) > 0):
            user['education'] += doc['education1'][0] + "\n"
        if ('education2' in doc and len(doc['education2']) > 0):
            user['education'] += doc['education2'][0] + "\n"
        user['education'] = user['education'].strip()
        user['work'] = ""
        if ('work' in doc and len(doc['work']) > 0):
            user['work'] += doc['work'][0] + "\n"
        if ('work1' in doc and len(doc['work1']) > 0):
            user['work'] += doc['work1'][0] + "\n"
        if ('work2' in doc and len(doc['work2']) > 0):
            user['work'] += doc['work2'][0] + "\n"
        user['work'] = user['work'].strip()

        return user
Пример #14
0
def appointment(author_id):
    user = getById(author_id)
    if(user["CurrentFunction"][0:3] != "app"):
        user["CurrentFunction"] = "app0"
        sendMessage("Would you like to make an appointment?", author_id)
        print(database)
    
    elif(user["CurrentFunction"][0:3] == "app"):
        print(database)
        if(int(user["CurrentFunction"][3:]) == 0):
            print(database)
            for x in getAllSubstrings(message.upper(),2):
                response = ""
                if x in ["YES"]:
                    response="YES"
                    break
                elif x in ["NO"]:
                    response="NO"
                    break
            
            if response.upper() == "YES":
                user["CurrentFunction"] = "app1"
                sendMessage("Please enter the date and time of your appointment",author_id)
            
            elif response.upper() =="NO":
                user["CurrentFunction"]=""
                sendMessage("Ok then!", author_id)
                print(database)

            else:
                sendMessage(answers["afirmativeQuestion"][language], author_id)

        elif(int(user["CurrentFunction"][3:]) == 1):
            global currentApp
            currentApp+=1
            user["CurrentFunction"] = "app" + str(currentApp)
            appDate = parseDate(message)
            appDateString = appDate.strftime("%Y-%m-%d %H:%M:%S")
            appointment={
                     "uid": author_id,
                     "appId": currentApp,
                     "date": appDateString,
                     "hospital": ""
                    }

            appointments.append(appointment)
            sendMessage("Where will the appointment take place?", author_id)
            print(appointments)

        elif(int(user["CurrentFunction"][3:]) > 1):
            appId = int(user["CurrentFunction"][3:])
            user["CurrentFunction"] = ""
            appointment=getAppById(appId)
            appointment["hospital"] = message
            sendMessage("Your appointment has been made on " + appointment["date"] + " for " + appointment["hospital"],author_id)
            print(appointment)
Пример #15
0
def startparse(xml):
    '''create parse tree and parse the list of starttimes
      [of the hourly time segments in the forecast].
      see sample xml forecast at:
        http://forecast.weather.gov/MapClick.php?lat=40.357439&lon=-74.64922&FcstType=digitalDWML
    '''
    tree = etree.parse(BytesIO(xml))
    starttimes = [
        parseDate(starttime)
        for starttime in tree.xpath('data/time-layout/start-valid-time/text()')
    ]
    return tree, starttimes
Пример #16
0
 def __update_vol(self, vol, account):
     v,created = EbsVolume.objects.get_or_create(id = vol.id,
                                                 account = account,
                                                 size = vol.size)
     if vol.snapshot_id:
         v.snapshot = self.get_snapshot(vol.snapshot_id)
     v.region    = vol.zone
     v.timestamp = parseDate(vol.create_time)
     v.state     = vol.status
     v.save()
     self.vols.append(v.id)
     return v
Пример #17
0
 def __update_snap(self, snap, account = None):
     nsnap, created    = SnapShot.objects.get_or_create(id = snap.id,
                                                        size = snap.volume_size)
     nsnap.account     = account
     nsnap.owner_id    = snap.owner_id
     nsnap.description = nsnap.description or ''
     nsnap.state       = snap.status
     nsnap.timestamp   = parseDate(snap.start_time)
     if account:
         nsnap.our = True
     nsnap.save()
     self.snaps.append(nsnap.id)
     return nsnap
Пример #18
0
    def __processTwitterDoc(self, doc):
        '''Converts a mongo document into user dictionary for matcher.
      
      '''
        user = {}
        if doc == None:
            return None

        # Commons
        user['username'] = doc['_id']
        user['name'] = doc['name']
        user['location'] = doc['location']
        user['website'] = doc['site']
        user['bio'] = doc['bio']
        if doc['photo'] != "" and doc['photo'] != None:
            user['profileImage'] = doc['photo']
        else:
            user['profileImage'] = ""
        try:
            user['matched'] = doc['matched']
        except:
            user['matched'] = None
        user['sourceCollection'] = TWITTER
        try:
            user['bornAt'] = parseDate(
                doc['born'][5:])  # removes "Born " string
        except:
            user['bornAt'] = None

        # Specials
        user['username'] = doc['_id']
        user['tweets'] = doc['tweets']
        user['followers'] = doc['followerids']
        if doc['joined'] != "":
            user['joinedAt'] = parseDate(doc['joined'][7:])
        else:
            user['joinedAt'] = None

        return user
Пример #19
0
    def _readRssFeed(self):
        filepath = mergePath(self.get_plugin_isntall_path()["path"], "rss.xml")
        if not os.path.exists(filepath):
            return []

        with open(filepath, "rb") as f:
            xmldoc = parseString("\r\n".join(f.readlines()))

        itemlist = xmldoc.getElementsByTagName("item")
        items = []
        for item in itemlist:
            itemdate = parseDate(get_xml_text(item.getElementsByTagName("pubDate")[0])).replace(tzinfo=None)

            rssitem = {
                "title": get_xml_text(item.getElementsByTagName("title")[0]),
                "link": get_xml_text(item.getElementsByTagName("link")[0]),
                "guid": get_xml_text(item.getElementsByTagName("guid")[0]),
                "pubDate": itemdate,
            }
            items.append(rssitem)
        return items
Пример #20
0
def get_timetable(url):
    r = session_requests.get(url, headers=get_page_headers)
    doc = fromstring(r.text)
    # print etree.tostring(doc, pretty_print=True)

    table = doc.get_element_by_id('MemberTimetable')
    gymbox_classes = []
    tomorrow_classes = []
    tomorrow = datetime.now().date() + timedelta(days=1)
    current_day = None
    # print etree.tostring(table, pretty_print=True)

    for child in list(table):
        if child.get('class') == 'dayHeader':
            current_day_string = child.text_content()
            prefix = '&#160;&#160;'
            if current_day_string.startswith(prefix):
                current_day_string = current_day_string[len(prefix):]
            current_day = parseDate(
                current_day_string.encode('ascii',
                                          'ignore').decode('ascii')).date()
        if current_day == tomorrow:
            tomorrow_classes.append(child)

    if len(tomorrow_classes) > 0:
        # Remove the dayHeader row and the column titles row
        del tomorrow_classes[0]
        del tomorrow_classes[0]

    for child in list(tomorrow_classes):
        time = child.find_class('col0Item')[0].text_content()
        class_name = child.find_class('col1Item')[0].text_content()
        instructor = child.find_class('col3Item')[0].text_content()
        duration = child.find_class('col4Item')[0].text_content()
        id = child.find_class('col5Item')[0][0].get('id')[5:]
        gymbox_classes.append(
            GymboxClass(id, class_name, time, instructor, duration))
    return gymbox_classes
Пример #21
0
    def _readRssFeed(self):
        filepath = mergePath(self.get_plugin_isntall_path()['path'], 'rss.xml')
        if not os.path.exists(filepath):
            return []

        with open(filepath, 'rb') as f:
            xmldoc = parseString("\r\n".join(f.readlines()))

        itemlist = xmldoc.getElementsByTagName('item')
        items = []
        for item in itemlist:
            itemdate = parseDate(
                get_xml_text(item.getElementsByTagName('pubDate')[0])).replace(
                    tzinfo=None)

            rssitem = {
                "title": get_xml_text(item.getElementsByTagName('title')[0]),
                "link": get_xml_text(item.getElementsByTagName('link')[0]),
                "guid": get_xml_text(item.getElementsByTagName('guid')[0]),
                "pubDate": itemdate
            }
            items.append(rssitem)
        return items
Пример #22
0
 def updateReservation(self, res):
     account = AwsAccount.objects.get(id = res.owner_id)
     reg     = res.region.name
     groups  = []
     instances = []
     for g in res.groups:
         groups.append(SecurityGroup.objects.get(name = g.id, account = account))
     for inst in res.instances:
         ami   = AMI.objects.get(id = inst.image_id)
         dt    = parseDate(inst.launch_time)
         key   = KeyPair.objects.get(name = inst.key_name, account = account)
         ain, created = Instance.objects.get_or_create(id = inst.id,
                                                       account = account,
                                                       ami = ami,
                                                       key_pair = key)
         ain.timestamp = dt
         ain.region = reg
         ain.state = inst.state
         ain.type  = inst.instance_type
         ain.private_dns_name = inst.private_dns_name
         ain.public_dns_name = inst.public_dns_name
         ain.ip_address = inst.ip_address or ''
         ain.monitored  = inst.monitored
     
         try:
             ebsblock   = inst.block_device_mapping[inst.root_device_name]
             ain.volume = EbsVolume.objects.get(id = ebsblock.volume_id)
             ain.size   = ain.volume.size
             ain.persistent = not ebsblock.delete_on_termination
         except:
             pass
     
         ain.security_groups.clear()
         for g in groups:
             ain.security_groups.add(g)
         ain.save()
         self.instances.append(ain.id)
Пример #23
0
    def getLogLinesAsRssItems(self, repositoryId, maxlines):
        """Geeft RSS <item> representatie van loglines terug"""
        #print "Getting loglines for", repositoryId, maxlines
        buffer = ''
        lines = self._getUniqueLogLines(repositoryId, maxlines)
        #lines = self._tail(repositoryId, maxlines)
        if lines:
            burl, prfx = None, None
            for line in reversed(lines):

                lineparts = line.split(' ', 2)

                # Get baseUrl and metadataperfix from meta part only once:
                # Beware: We'll assume the latest warning has the most recent (and probably correct) repository settings.
                if burl is None:
                    burl, prfx = self._getMetaPartStuff(lineparts[1])

                oai_id = lineparts[1].split(':', 1)[1]
                rssData = {
                    'title':
                    xmlEscape(oai_id),
                    'description':
                    xmlEscape(lineparts[2]),
                    'identifier':
                    xmlEscape(lineparts[1]),
                    'date':
                    xmlEscape(
                        str((parseDate(lineparts[0], ignoretz=True)).date())),
                    'link':
                    xmlEscape(
                        ('%s?verb=GetRecord&identifier=%s&metadataPrefix=%s') %
                        (burl, oai_id, prfx))
                }
                buffer += str(RSS_TEMPLATE % rssData)

        return buffer
Пример #24
0
def isDate(string):
    try:
        parseDate(string, fuzzy=True).date().isoformat()
        return True
    except:
        return False
Пример #25
0
def process_feed_item(feed_item, source, articles_in_memory, db):
    # check for duplicate in db
    if db.articles.find_one({'url': feed_item.link}) is not None:
        logger.debug('Skip: article already exists')
        return False

    # check if link exists already in memory
    if any(a['url'] == feed_item.link for a in articles_in_memory):
        logger.debug('Skip: article already exists')
        return False

    # parse article
    try:
        article = Article(feed_item.link, config=create_newspaper_config())
        article.download()
        article.parse()
    except newspaper.article.ArticleException as exc:
        logger.debug(f'Newspaper error: {exc}')
        # logger.exception(exc)
        return False

    # check title
    article_title = article.title.strip()
    if not article_title:
        logger.debug('Skip: no title or text')
        return False

    # check text
    article_text = article.text.strip()
    if len(article_text) < MIN_TEXT_LENGTH:
        logger.debug('Skip: text too short')
        return False

    # must have date
    published_at_val = None
    if article.publish_date:
        # get from parsed article
        published_at_val = article.publish_date
    elif hasattr(feed_item, 'published'):
        # get from feed item
        published_at_val = feed_item.published

    if not published_at_val:
        logger.debug('Skip: missing date')
        return False

    # normalize date, create datetime object, remove time zone
    if isinstance(published_at_val, datetime):
        published_at = published_at_val.replace(tzinfo=None)
    elif isinstance(published_at_val, str):
        try:
            published_at = parseDate(published_at_val, ignoretz=True)
        except ParserError as exc:
            logger.debug(f'Dateutil parse error: {exc}')
            return False
    else:
        logger.debug('No valid date found')
        return False

    # date must be withing last n days
    difference = datetime.now() - published_at
    if difference.days > config.KEEP_DAYS:
        logger.debug(
            f'Skip: Article older than {config.KEEP_DAYS} days ({published_at})'
        )
        return False

    # create mew item
    return {
        'title': article_title,
        'published_at': published_at,
        'created_at': datetime.now(),
        'url': feed_item.link,
        'src': source['id'],
        'text': article_text
    }
Пример #26
0
def overview_json():
    # for head in request.headers:
    # print head, request.headers.get(head)
    auth = request.headers.get("Authorization")
    if not auth:
        return authenticate({
            'code': 'authorization_header_missing',
            'description': 'Authorization header is expected'
        })

    parts = auth.split()

    if parts[0].lower() != 'bearer':
        return {
            'code': 'invalid_header',
            'description': 'Authorization header must start with Bearer'
        }
    elif len(parts) == 1:
        return {'code': 'invalid_header', 'description': 'Token not found'}
    elif len(parts) > 2:
        return {
            'code': 'invalid_header',
            'description': 'Authorization header must be Bearer + \s + token'
        }

    token = parts[1]
    try:

        company_id = "1"

        conn = sqlite3.connect('seaborg_god.db')
        conn.row_factory = dict_factory
        c = conn.cursor()
        c.execute("SELECT * FROM companies WHERE id LIKE ?", (company_id))
        company_info = c.fetchone()

        payload = jwt.decode(token, company_info['jwt_secret']
                             #audience=client_id
                             )
    except jwt.ExpiredSignature:
        return authenticate({
            'code': 'token_expired',
            'description': 'token is expired'
        })
    except jwt.InvalidAudienceError:
        return authenticate({
            'code':
            'invalid_audience',
            'description':
            'incorrect audience, expected: ' + client_id
        })
    except jwt.DecodeError:
        return authenticate({
            'code': 'token_invalid_signature',
            'description': 'token signature is invalid'
        })

    date_sent = parseDate(request.query.date_today)
    # print date_sent
    # print request.query.cid
    # print request.forms.get('cid')
    # print request.json
    # print "decoded", payload
    # # print "loading task id", item
    conn = sqlite3.connect('seaborg_god.db')
    conn.row_factory = dict_factory
    c = conn.cursor()

    c.execute(
        """SELECT i.name
          ,i.id
          ,i.outline
          ,i.creation_date
          ,i.approval_date
          ,i.rejection_date
          ,i.completion_date
          ,i.proposal_date
          ,i.created_by
          ,i.approved_by
          ,i.rejected_by
          ,i.completed_by
          ,i.proposed_by
          ,i.responsible
          ,i.reporting_to
          ,i.reporting_cycle
          ,i.progress_report_id
          ,i.final_report_id
          ,i.budget_id
          ,i.department_owner
          ,i.last_save
          ,i.deadline_date
          ,i.approval_requested
          ,e1.name AS created_name
          ,e2.name AS approved_name
          ,e3.name AS rejected_name
          ,e4.name AS completed_name
          ,e5.name AS proposed_name
          ,e6.name AS responsible_name
          ,e7.name AS reporting_to_name
          ,departments.title AS department_title
          ,departments.department_head AS department_head_id
          ,departments.parent_department AS parent_department_id
          ,depHeadPerson.name as department_approval_by
    FROM   tasks i 

    LEFT JOIN people e1 ON e1.id = i.created_by
    LEFT JOIN people e2 ON e2.id = i.approved_by
    LEFT JOIN people e3 ON e3.id = i.rejected_by
    LEFT JOIN people e4 ON e4.id = i.completed_by
    LEFT JOIN people e5 ON e5.id = i.proposed_by
    LEFT JOIN people e6 ON e6.id = i.responsible
    LEFT JOIN people e7 ON e7.id = i.reporting_to
    LEFT JOIN departments ON departments.id = i.department_owner
    LEFT JOIN people depHeadPerson ON departments.department_head = depHeadPerson.id
        WHERE i.company_id LIKE ?
    """, (str(payload['company_id']), ))

    tasks = c.fetchall()

    c.execute(
        "SELECT axioms.* FROM axioms, tasks WHERE axioms.task_id = tasks.id AND tasks.company_id LIKE ?",
        (str(payload['company_id']), ))
    axioms = c.fetchall()
    for axiom in axioms:
        if axiom['task_dependence_id']:
            print "the task has a depenendence"
            c.execute("SELECT name, deadline_date FROM tasks WHERE id = ?",
                      (axiom['task_dependence_id'], ))
            axiom_task = c.fetchone()
            axiom['dependence'] = {
                "name": axiom_task['name'],
                "id": axiom['task_dependence_id'],
                "deadline_date": axiom_task['deadline_date']
            }

    c.execute(
        "SELECT goals.* FROM goals, tasks WHERE goals.task_id = tasks.id AND tasks.company_id LIKE ?",
        (str(payload['company_id']), ))
    goals = c.fetchall()

    c.execute(
        "SELECT deliverables.* FROM deliverables, tasks WHERE deliverables.task_id = tasks.id AND tasks.company_id LIKE ?",
        (str(payload['company_id']), ))
    deliverables = c.fetchall()

    c.execute(
        "SELECT objectives.* FROM objectives, tasks WHERE objectives.task_id = tasks.id AND tasks.company_id LIKE ?",
        (str(payload['company_id']), ))
    objectives = c.fetchall()

    for task in tasks:
        objs = [obj for obj in axioms if obj['task_id'] == task['id']]
        task['axioms'] = objs

        objs = [obj for obj in goals if obj['task_id'] == task['id']]
        task['goals'] = objs

        objs = [obj for obj in deliverables if obj['task_id'] == task['id']]
        for obj in objs:
            if obj['isdelivered'] == 1:
                obj['isdelivered'] = True
            else:
                obj['isdelivered'] = False
        task['deliverables'] = objs

        objs = [obj for obj in objectives if obj['task_id'] == task['id']]
        task['objectives'] = objs

        if task['name'] == "": task['name'] = "Untitled task"
        task['isCompleted'] = task[
            'completion_date'] is not None and task['completion_date'] != ""
        task['isApproved'] = task[
            'approval_date'] is not None and task['approval_date'] != ""
        task['isRejected'] = task[
            'rejection_date'] is not None and task['rejection_date'] != ""
        task['isPendingApproval'] = task[
            'approval_requested'] is not None and task[
                'approval_requested'] != ""
        task['isCreator'] = payload['id'] == task['created_by']
        task['isApprover'] = payload['id'] == task['department_head_id']
        task['isAssignee'] = payload['id'] == task['responsible']
        task['isRaportingOfficer'] = payload['id'] == task['reporting_to']
        task['deadlineDue'] = (parseDate(task['deadline_date']) -
                               parseDate(request.query.date_today)).days

        # Task state
        if task['isCompleted']:
            task['state'] = 'isCompleted'
        elif task['isApproved']:
            task['state'] = 'isApproved'
        elif task['isRejected']:
            task['state'] = 'isRejected'
        elif task['isPendingApproval']:
            task['state'] = 'isPendingApproval'
        else:
            task['state'] = 'isDraft'

    c.execute(
        "SELECT company_name, owner as company_owner_id, url_name as company_url FROM companies WHERE id = ?",
        (str(payload['company_id']), ))
    company_info = c.fetchone()

    conn.close()

    overview = {"tasks": tasks, "company": company_info}
    return overview
Пример #27
0
    def run(self):

        self.train_meta_data = TransactionMetadata()
        self.train_meta_data.setFromDict(self.transaction.persistent_model_metadata.train_metadata)

        header = self.transaction.input_data.columns
        origData = {}

        for column in header:
            origData[column] = []

        empty_count = {}
        column_count = {}

        # we dont need to generate statistic over all of the data, so we subsample, based on our accepted margin of error
        population_size = len(self.transaction.input_data.data_array)
        sample_size = int(sampleSize(population_size=population_size, margin_error=CONFIG.DEFAULT_MARGIN_OF_ERROR, confidence_level=CONFIG.DEFAULT_CONFIDENCE_LEVEL))

        # get the indexes of randomly selected rows given the population size
        input_data_sample_indexes = random.sample(range(population_size), sample_size)
        self.logging.info('population_size={population_size},  sample_size={sample_size}  {percent:.2f}%'.format(population_size=population_size, sample_size=sample_size, percent=(sample_size/population_size)*100))

        for sample_i in input_data_sample_indexes:
            row = self.transaction.input_data.data_array[sample_i]
            for i, val in enumerate(row):
                column = header[i]
                value = tryCastToNumber(val)
                if not column in empty_count:
                    empty_count[column] = 0
                    column_count[column] = 0
                if value == None:
                    empty_count[column] += 1
                else:
                    origData[column].append(value)
                column_count[column] += 1
        stats = {}

        for i, col_name in enumerate(origData):
            col_data = origData[col_name] # all rows in just one column
            data_type = self.getColumnDataType(col_data)

            # NOTE: Enable this if you want to assume that some numeric values can be text
            # We noticed that by default this should not be the behavior
            # TODO: Evaluate if we want to specify the problem type on predict statement as regression or classification
            #
            # if col_name in self.train_meta_data.model_predict_columns and data_type == DATA_TYPES.NUMERIC:
            #     unique_count = len(set(col_data))
            #     if unique_count <= CONFIG.ASSUME_NUMERIC_AS_TEXT_WHEN_UNIQUES_IS_LESS_THAN:
            #         data_type = DATA_TYPES.TEXT

            if data_type == DATA_TYPES.DATE:
                for i, element in enumerate(col_data):
                    if str(element) in [str(''), str(None), str(False), str(np.nan), 'NaN', 'nan', 'NA']:
                        col_data[i] = None
                    else:
                        try:
                            col_data[i] = int(parseDate(element).timestamp())
                        except:
                            logging.warning('Could not convert string to date and it was expected, current value {value}'.format(value=element))
                            col_data[i] = None

            if data_type == DATA_TYPES.NUMERIC or data_type == DATA_TYPES.DATE:
                newData = []

                for value in col_data:
                    if value != '' and value != '\r' and value != '\n':
                        newData.append(value)


                col_data = [cleanfloat(i) for i in newData if str(i) not in ['', str(None), str(False), str(np.nan), 'NaN', 'nan', 'NA']]

                y, x = np.histogram(col_data, 50, density=False)
                x = (x + np.roll(x, -1))[:-1] / 2.0
                x = x.tolist()
                y = y.tolist()

                xp = []

                if len(col_data) > 0:
                    max_value = max(col_data)
                    min_value = min(col_data)
                    mean = np.mean(col_data)
                    median = np.median(col_data)
                    var = np.var(col_data)
                    skew = st.skew(col_data)
                    kurtosis = st.kurtosis(col_data)

                    inc_rate = 0.05
                    initial_step_size = abs(max_value-min_value)/100

                    xp += [min_value]
                    i = min_value + initial_step_size

                    while i < max_value:

                        xp += [i]
                        i_inc = abs(i-min_value)*inc_rate
                        i = i + i_inc


                    # TODO: Solve inc_rate for N
                    #    min*inx_rate + (min+min*inc_rate)*inc_rate + (min+(min+min*inc_rate)*inc_rate)*inc_rate ....
                    #
                    #      x_0 = 0
                    #      x_i = (min+x_(i-1)) * inc_rate = min*inc_rate + x_(i-1)*inc_rate
                    #
                    #      sum of x_i_{i=1}^n (x_i) = max_value = inc_rate ( n * min + sum(x_(i-1)) )
                    #
                    #      mx_value/inc_rate = n*min + inc_rate ( n * min + sum(x_(i-2)) )
                    #
                    #     mx_value = n*min*in_rate + inc_rate^2*n*min + inc_rate^2*sum(x_(i-2))
                    #              = n*min(inc_rate+inc_rate^2) + inc_rate^2*sum(x_(i-2))
                    #              = n*min(inc_rate+inc_rate^2) + inc_rate^2*(inc_rate ( n * min + sum(x_(i-3)) ))
                    #              = n*min(sum_(i=1)^(i=n)(inc_rate^i))
                    #    =>  sum_(i=1)^(i=n)(inc_rate^i)) = max_value/(n*min(sum_(i=1)^(i=n))
                    #
                    # # i + i*x

                else:
                    max_value = 0
                    min_value = 0
                    mean = 0
                    median = 0
                    var = 0
                    skew = 0
                    kurtosis = 0
                    xp = []


                is_float = True if max([1 if int(i) != i else 0 for i in col_data]) == 1 else False


                col_stats = {
                    "column": col_name,
                    KEYS.DATA_TYPE: data_type,
                    # "distribution": best_fit_name,
                    # "distributionParams": distribution_params,
                    "mean": mean,
                    "median": median,
                    "variance": var,
                    "skewness": skew,
                    "kurtosis": kurtosis,
                    "emptyColumns": empty_count[col_name],
                    "emptyPercentage": empty_count[col_name] / column_count[col_name] * 100,
                    "max": max_value,
                    "min": min_value,
                    "is_float": is_float,
                    "histogram": {
                        "x": x,
                        "y": y
                    },
                    "percentage_buckets": xp
                }
                stats[col_name] = col_stats
            # else if its text
            else:

                # see if its a sentence or a word
                is_full_text = True if data_type == DATA_TYPES.FULL_TEXT else False
                dictionary, histogram = self.getWordsDictionary(col_data, is_full_text)

                # if no words, then no dictionary
                if len(col_data) == 0:
                    dictionary_available = False
                    dictionary_lenght_percentage = 0
                    dictionary = []
                else:
                    dictionary_available = True
                    dictionary_lenght_percentage = len(
                        dictionary) / len(col_data) * 100
                    # if the number of uniques is too large then treat is a text
                    if dictionary_lenght_percentage > 10 and len(col_data) > 50 and is_full_text==False:
                        dictionary = []
                        dictionary_available = False
                col_stats = {

                    "column": col_name,
                    KEYS.DATA_TYPE: DATA_TYPES.FULL_TEXT if is_full_text else data_type,
                    "dictionary": dictionary,
                    "dictionaryAvailable": dictionary_available,
                    "dictionaryLenghtPercentage": dictionary_lenght_percentage,
                    "emptyColumns": empty_count[col_name],
                    "emptyPercentage": empty_count[col_name] / column_count[col_name] * 100,
                    "histogram": histogram
                }
                stats[col_name] = col_stats



        total_rows = len(self.transaction.input_data.data_array)
        test_rows = len(self.transaction.input_data.test_indexes)
        validation_rows = len(self.transaction.input_data.validation_indexes)
        train_rows = len(self.transaction.input_data.train_indexes)

        self.transaction.persistent_model_metadata.column_stats = stats
        self.transaction.persistent_model_metadata.total_row_count = total_rows
        self.transaction.persistent_model_metadata.test_row_count = test_rows
        self.transaction.persistent_model_metadata.train_row_count = train_rows
        self.transaction.persistent_model_metadata.validation_row_count = validation_rows

        self.transaction.persistent_model_metadata.update()

        return stats
Пример #28
0
    def _getTopItem(self, lxmlNode):
        ## Wrappers:
        pid, modified, mimetype, pidlocation = '', '', "application/xml", ''

        #1:     Get persistentIdentifier:
        pidlist = lxmlNode.xpath(
            '//didl:DIDL/didl:Item/didl:Descriptor/didl:Statement/dii:Identifier/text()',
            namespaces=self._nsMap)
        if len(pidlist) > 0:
            pid = pidlist[0].strip()
            if not comm.isURNNBN(pid):
                raise ValidateException(
                    formatExceptionLine(EXCEPTION0 + pid, prefix=STR_DIDL))
        else:
            raise ValidateException(
                formatExceptionLine(EXCEPTION1, prefix=STR_DIDL))

#2:     Get toplevel modificationDate: comm.isISO8601()
        tl_modified = lxmlNode.xpath(
            '//didl:DIDL/didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()',
            namespaces=self._nsMap)
        ## Check op geldig/aanwezigheid tlModified, anders exception:
        if len(tl_modified) > 0 and not comm.isISO8601(tl_modified[0]):
            raise ValidateException(
                formatExceptionLine(EXCEPTION2 + tl_modified[0],
                                    prefix=STR_DIDL))
        elif len(tl_modified) == 0:
            raise ValidateException(
                formatExceptionLine(EXCEPTION3, prefix=STR_DIDL))

        ## Get all modified dates:
        all_modified = lxmlNode.xpath(
            '//didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()',
            namespaces=self._nsMap)

        ## Get most recent date from all items, to add to toplevelItem:
        if len(all_modified) > 0:
            datedict = {}
            for date in all_modified:
                if comm.isISO8601(date.strip()):
                    #datedict[parseDate(date.strip())] = date.strip()
                    pd = parseDate(date.strip())
                    datedict["%s %s" %
                             (str(pd.date()), str(pd.time()))] = date.strip()

            ## Get first sorted key:
            for key in reversed(sorted(datedict.iterkeys())):
                modified = datedict[key]
                break
        if not tl_modified[0].strip() == modified:
            self.do.logMsg(self._uploadid, LOGGER1, prefix=STR_DIDL)

#3:     Get PidResourceMimetype
        mimetypelist = lxmlNode.xpath(
            '//didl:DIDL/didl:Item/didl:Component/didl:Resource/@mimeType',
            namespaces=self._nsMap)
        if len(mimetypelist) > 0:
            mimetype = mimetypelist[0].strip()
            if not comm.isMimeType(mimetype):
                self.do.logMsg(self._uploadid,
                               LOGGER2 + mimetype,
                               prefix=STR_DIDL)

#4:     Get PidResourceLocation:
        pidlocation = self._findAndBindFirst(
            lxmlNode,
            '%s',
            '//didl:DIDL/didl:Item/didl:Component/didl:Resource/@ref',
            '//didl:DIDL/didl:Item/didl:Component/didl:Resource/text()'
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref',  #DIDL 3.0
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref',  #DIDL 3.0, without @rdf:resource
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref',  #fallback DIDL 2.3.1
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref',  #fallback DIDL 3.0
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref',  #fallback DIDL 3.0, without @rdf:resource
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref'  #fallback DIDL 2.3.1
        ).strip()

        if pidlocation == '':
            raise ValidateException(
                formatExceptionLine(EXCEPTION4, prefix=STR_DIDL))
        if not comm.isURL(pidlocation):
            raise ValidateException(
                formatExceptionLine(EXCEPTION5 + pidlocation, prefix=STR_DIDL))

        return """<didl:Item>
        <didl:Descriptor><didl:Statement mimeType="application/xml"><dii:Identifier>%s</dii:Identifier></didl:Statement></didl:Descriptor>
        <didl:Descriptor><didl:Statement mimeType="application/xml"><dcterms:modified>%s</dcterms:modified></didl:Statement></didl:Descriptor>
        <didl:Component><didl:Resource mimeType="%s" ref="%s"/></didl:Component>""" % (
            escapeXml(pid), modified, escapeXml(mimetype),
            comm.urlQuote(pidlocation))
Пример #29
0
    def run(self):

        header = self.transaction.input_data.columns
        origData = {}

        for column in header:
            origData[column] = []

        empty_count = {}
        column_count = {}

        # we dont need to generate statistic over all of the data, so we subsample, based on our accepted margin of error
        population_size = len(self.transaction.input_data.data_array)
        sample_size = int(sampleSize(population_size=population_size, margin_error=CONFIG.DEFAULT_MARGIN_OF_ERROR, confidence_level=CONFIG.DEFAULT_CONFIDENCE_LEVEL))

        # get the indexes of randomly selected rows given the population size
        input_data_sample_indexes = random.sample(range(population_size), sample_size)
        self.logging.info('population_size={population_size},  sample_size={sample_size}  {percent:.2f}%'.format(population_size=population_size, sample_size=sample_size, percent=(sample_size/population_size)*100))

        for sample_i in input_data_sample_indexes:
            row = self.transaction.input_data.data_array[sample_i]
            for i, val in enumerate(row):
                column = header[i]
                value = self.cast(val)
                if not column in empty_count:
                    empty_count[column] = 0
                    column_count[column] = 0
                if value == None:
                    empty_count[column] += 1
                else:
                    origData[column].append(value)
                column_count[column] += 1
        stats = {}

        for i, col_name in enumerate(origData):
            col_data = origData[col_name] # all rows in just one column
            data_type = self.getColumnDataType(col_data)
            if data_type == DATA_TYPES.DATE:
                for i, element in enumerate(col_data):
                    if str(element) in [str(''), str(None), str(False), str(np.nan), 'NaN', 'nan', 'NA']:
                        col_data[i] = None
                    else:
                        try:
                            col_data[i] = int(parseDate(element).timestamp())
                        except:
                            logging.warning('Could not convert string to date and it was expected, current value {value}'.format(value=element))
                            col_data[i] = None

            if data_type == DATA_TYPES.NUMERIC or data_type == DATA_TYPES.DATE:
                newData = []

                for value in col_data:
                    if value != '' and value != '\r' and value != '\n':
                        newData.append(value)


                col_data = [float(i) for i in newData if str(i) not in ['', str(None), str(False), str(np.nan), 'NaN', 'nan', 'NA']]

                y, x = np.histogram(col_data, 50, density=False)
                x = (x + np.roll(x, -1))[:-1] / 2.0
                x = x.tolist()
                y = y.tolist()

                if len(col_data) > 0:
                    max_value = max(col_data)
                    min_value = min(col_data)
                    mean = np.mean(col_data)
                    median = np.median(col_data)
                    var = np.var(col_data)
                    skew = st.skew(col_data)
                    kurtosis = st.kurtosis(col_data)
                else:
                    max_value = 0
                    min_value = 0
                    mean = 0
                    median = 0
                    var = 0
                    skew = 0
                    kurtosis = 0


                col_stats = {
                    "column": col_name,
                    KEYS.DATA_TYPE: data_type,
                    # "distribution": best_fit_name,
                    # "distributionParams": distribution_params,
                    "mean": mean,
                    "median": median,
                    "variance": var,
                    "skewness": skew,
                    "kurtosis": kurtosis,
                    "emptyColumns": empty_count[col_name],
                    "emptyPercentage": empty_count[col_name] / column_count[col_name] * 100,
                    "max": max_value,
                    "min": min_value,
                    "histogram": {
                        "x": x,
                        "y": y
                    }
                }
                stats[col_name] = col_stats
            # else if its text
            else:

                # see if its a sentence or a word
                is_full_text = True if data_type == DATA_TYPES.FULL_TEXT else False
                dictionary, histogram = self.getWordsDictionary(col_data, is_full_text)

                # if no words, then no dictionary
                if len(col_data) == 0:
                    dictionary_available = False
                    dictionary_lenght_percentage = 0
                    dictionary = []
                else:
                    dictionary_available = True
                    dictionary_lenght_percentage = len(
                        dictionary) / len(col_data) * 100
                    # if the number of uniques is too large then treat is a text
                    if dictionary_lenght_percentage > 10 and len(col_data) > 50 and is_full_text==False:
                        dictionary = []
                        dictionary_available = False
                col_stats = {

                    "column": col_name,
                    KEYS.DATA_TYPE: DATA_TYPES.FULL_TEXT if is_full_text else data_type,
                    "dictionary": dictionary,
                    "dictionaryAvailable": dictionary_available,
                    "dictionaryLenghtPercentage": dictionary_lenght_percentage,
                    "emptyColumns": empty_count[col_name],
                    "emptyPercentage": empty_count[col_name] / column_count[col_name] * 100,
                    "histogram": histogram
                }
                stats[col_name] = col_stats



        total_rows = len(self.transaction.input_data.data_array)
        test_rows = len(self.transaction.input_data.test_indexes)
        validation_rows = len(self.transaction.input_data.validation_indexes)
        train_rows = len(self.transaction.input_data.train_indexes)

        self.transaction.persistent_model_metadata.column_stats = stats
        self.transaction.persistent_model_metadata.total_row_count = total_rows
        self.transaction.persistent_model_metadata.test_row_count = test_rows
        self.transaction.persistent_model_metadata.train_row_count = train_rows
        self.transaction.persistent_model_metadata.validation_row_count = validation_rows

        self.transaction.persistent_model_metadata.update()

        return stats
Пример #30
0
def isISO8601(datestring):
    try:
        parseDate(datestring)
    except ValueError:
        return False
    return True
Пример #31
0
def main():
    env = getGHEnv()
    gh = env["GITHUB"]
    inpV = env["INPUT"]
    e = json.loads(gh["EVENT_PATH"].read_text())
    #pprint(e)
    i = e["issue"]
    id = i["id"]
    no = i["number"]
    b = i["body"]
    l = i["locked"]
    c = parseDate(i["created_at"])
    up = parseDate(i["updated_at"])
    u = i["user"]
    r = e["repository"]
    rn = r["name"]
    ro = r["owner"]
    rol = ro["login"]
    lblz = {lbl["name"] for lbl in i["labels"]}

    #print(e["action"], "c", c, "up", up, u["login"], i["state"], lblz)
    ksyStub, otherMetadata = parseHeaders(b)

    parser = YAML(typ="safe")

    illF = " contains ill-formed YAML"
    if ksyStub:
        try:
            ksyStub = parser.load(ksyStub)
        except:
            ksyStub = None
            ksyStubIssues = ["KSY stub" + illF]
        if ksyStub:
            ksyStubIssues = lintKSYStub(ksyStub)
    else:
        ksyStubIssues = [
            "KSY stub (`meta` + `doc` + `doc-ref` must be present) is missing"
        ]

    if otherMetadata:
        try:
            otherMetadata = parser.load(otherMetadata)
        except:
            otherMetadata = None
            additionalBlockIssues = ["Additional block" + illF]
        additionalBlockIssues = lintAdditionalBlock(otherMetadata)
    else:
        additionalBlockIssues = ()

    api = GHAPI(inpV["GITHUB_TOKEN"])
    repO = api.repo(rol, rn)
    issueO = repO.issue(no)

    if ksyStubIssues or additionalBlockIssues:
        lblzMustBe = (lblz | {invalidLabel}) - {validLabel}
        if invalidLabel not in lblz:
            issueO.leaveAComment(
                generateIssuesMessage(
                    "Hi. Thank you for leaving the request. Please, fix the following issues in it:",
                    ksyStubIssues, additionalBlockIssues))
        else:
            # todo: parse the issues and diff them
            issueO.leaveAComment(
                generateIssuesMessage("Some issues are still present:",
                                      ksyStubIssues, additionalBlockIssues))
    else:
        lblzMustBe = (lblz | {validLabel}) - {invalidLabel}
        if invalidLabel in lblz or validLabel not in lblz:
            print("commenting")
            issueO.leaveAComment(
                "The issues that are detected by the linter have been fixed. Thank you."
            )
            print("commented")
        else:
            pass  # everything is OK

    if lblzMustBe != lblz:
        print("Fixing labels")
        issueO.setLabels(lblzMustBe)
        print("Fixed labels")
Пример #32
0
def toDate(dateString):
    try:
        return parseDate(dateString)
    except:
        # return epoch on failure
        return datetime.fromtimestamp(0)
Пример #33
0
        minima[dataIndex] = min(minima[dataIndex], maxOnline)
        maxima[dataIndex] = max(maxima[dataIndex], maxOnline)
        average[dataIndex] += maxOnline
        averageCount[dataIndex] += 1

        dataIndex += 1
        if dataIndex == 7 * 24:
            dataIndex = 0

        if dataIndex == _dataIndex:
            times = times - 1


with open(sys.argv[1], "r") as log:
    line = log.readline()[:-1].split(" ")
    timestamp = parseDate(line[0])
    lastTimestamp = timestamp
    timeBorder = roundTime(timestamp, True)
    dataIndex = timestamp.weekday() * 24 + timestamp.hour

    if line[1] != "RESET":
        print "WARN: First line is not a RESET line"

    for line in log:
        line = line[:-1].split(" ")
        timestamp = parseDate(line[0])

        if line[1] == "RESET":
            online = 0
        elif line[1] == "+":
            online += 1
Пример #34
0
def norm(value, cell_stats):


    if cell_stats[KEYS.DATA_TYPE] == DATA_TYPES.NUMERIC:

        if (str(value) in [str(''), str(' '), str(None), str(False), str(np.nan), 'NaN', 'nan', 'NA'] or (
                value == None or value == '' or value == '\n' or value == '\r')):
            return [0, 0, 0]

        if cell_stats['max'] - cell_stats['min'] != 0:

            normalizedValue = (value - cell_stats['min']) / \
                              (cell_stats['max'] - cell_stats['min'])


        elif cell_stats['max'] != 0:
            normalizedValue = value / cell_stats['max']
        else:
            normalizedValue = value

        # if normalizedValue > 10:
        #     raise ValueError('Something is wrong with normalized value')

        sign = 1 if normalizedValue >= 0 else 0

        normalizedValue = abs(normalizedValue) + OFFSET

        return [normalizedValue, sign, 1.0]

    if cell_stats[KEYS.DATA_TYPE] == DATA_TYPES.DATE:
        #[ timestamp, year, month, day, minute, second, is null]
        if (str(value) in [str(''), str(' '), str(None), str(False), str(np.nan), 'NaN', 'nan', 'NA'] or (
                value == None or value == '' or value == '\n' or value == '\r')):
            ret = [0]*7
            ret[-1] = 0
            return ret

        try:
            timestamp = int(parseDate(value).timestamp())
        except:
            ret = [0] * 7
            ret[-1] = 0
            return ret
        date = datetime.datetime.fromtimestamp(timestamp)
        date_max = datetime.datetime.fromtimestamp(cell_stats['max'])
        date_min = datetime.datetime.fromtimestamp(cell_stats['min'])

        attrs = ['year', 'month', 'day', 'minute', 'second']
        maxes = {'day': 31, 'minute': 60, 'second': 60, 'month': 12}

        norm_vals = []

        if cell_stats['max'] - cell_stats['min'] != 0:
            norm_vals.append( (timestamp - cell_stats['min']) / (cell_stats['max'] - cell_stats['min']) )
        else:
            norm_vals.append( timestamp / cell_stats['max'] )

        for k_attr  in attrs:

            curr = getattr(date, k_attr)
            if k_attr in maxes:
                d_max = maxes[k_attr]
                d_min = 0
            else:
                d_max = getattr(date_max, k_attr)
                d_min = getattr(date_min, k_attr)

            if d_max - d_min !=0:
                norm_vals.append( (curr -d_min)/(d_max-d_min) )
            else:
                norm_vals.append((curr) / (d_max))

        norm_vals.append(1.0)

        return norm_vals

    if cell_stats[KEYS.DATA_TYPE] == DATA_TYPES.TEXT:
        # is it a word
        if cell_stats['dictionaryAvailable']:
            # all the words in the dictionary +2 (one for rare words and one for null)
            vector_length = len(cell_stats['dictionary']) + TEXT_ENCODING_EXTRA_LENGTH
            arr = [0] * vector_length
            arr[-1] = 1.0
            if value in [None, '']:
                # return NULL value, which is an empy hot vector array with the last item in list with value 1
                arr[vector_length - 1] = 0  # set null as 1
                return arr

            # else return one hot vector
            # if word is a strange word it will not be in the dictionary
            try:
                index = cell_stats['dictionary'].index(value)
            except:
                index = vector_length - 2

            arr[index] = 1
            return arr

        else:

            return []

    if cell_stats[KEYS.DATA_TYPE] == DATA_TYPES.FULL_TEXT:

        if (str(value) in [str(''), str(' '), str(None), str(False), str(np.nan), 'NaN', 'nan', 'NA'] or (
                value == None or value == '' or value == '\n' or value == '\r')):
            return [FULL_TEXT_NONE_VALUE]

        # is it a full text
        if cell_stats['dictionaryAvailable']:
            # all the words in the dictionary +2 (one for rare words and one for null)
            vector_length = len(cell_stats['dictionary']) + FULL_TEXT_ENCODING_EXTRA_LENGTH


            # else return a list of one hot vectors
            values = splitRecursive(value, WORD_SEPARATORS)
            array_of_arrays = []
            first_word = vector_length - 4

            array_of_arrays += [FULL_TEXT_IS_START]
            for word in values:
                # else return one hot vector
                # if word is a strange word it will not be in the dictionary
                try:
                    index = cell_stats['dictionary'].index(word)
                except:
                    index = FULL_TEXT_UN_FREQUENT
                array_of_arrays += [index]



            array_of_arrays += [FULL_TEXT_IS_END]
            # return [array_of_arrays]
            # TODO: ask about this
            return array_of_arrays

        else:

            return []