def on_status(self, status): logging.info("writing tweet to couchdb") data = None try: data = analysis.extract(status) except Exception as e: logging.info("exception in parsing tweet: {}".format(e)) return True data["sentiment"] = analysis.sentiment(data["text"]) self.db.save(json.loads(json.dumps(data)))
def on_status(self, status): logging.info("writing tweet to {}".format(self.file)) data = None try: data = analysis.extract(status) except Exception as e: logging.info("exception in parsing tweet: {}".format(e)) return True data["sentiment"] = analysis.sentiment(data["text"]) print(data, file=self.file) return True
logging.info("using query {}".format(query)) auth = credentials.authenticate(cred_user) api = API(auth, wait_on_rate_limit=True) search_results = Cursor(api.search, q=query, count=MAX_COUNT, include_entities=True).items(5000000) try: for status in search_results: logging.info("saving tweet with id {}".format(status.id)) try: data = analysis.extract(status) except KeyError as e: logging.info("exception in extract: {}".format(e)) continue if data is None: continue data["sentiment"] = analysis.sentiment(data["text"]) db.save(json.loads(json.dumps(data))) # print(data) except error.TweepError as e: # Should cover RateLimitException logging.error("exception in search_results: {}".format(e)) logging.info("exiting...")
def _analyze(signal, sample_rate): features = analysis.extract(signal, sample_rate, window=1.0, step=0.5) # Normalize the features across all of the vectors. means = np.mean(features, axis=1) stds = np.std(features, axis=1) return (features.T - means) / stds
def calc_clips(track, plot=False): # Load audio file and normalize for analysis signal = audiofile.read(track.source) signal = musictoys.analysis.normalize(signal) samplerate = signal.sample_rate # Extract some features and normalize them around their means. # We must transpose the features list because the extractor returns rows # of frames, and we want rows of features. features = analysis.extract(signal, samplerate, window=1.0, step=0.5).T orig_features = features features -= np.mean(features, axis=0) features /= np.std(features, axis=0) # Compute self-similarity, normalize to 0..1 pairwise_dist = scipy.spatial.distance.pdist(features, 'cosine') sim_matrix = 1.0 - scipy.spatial.distance.squareform(pairwise_dist) # Score each half-second step for overall similarity. score = np.median(sim_matrix, axis=0) score -= np.min(score) score /= np.max(score) best_score = np.argmax(score) # Score each half-second step for suitability as the start of a 30-second # window - what is the average score for each such window? startscore = np.zeros(len(score) - 60) for i in range(len(startscore)): startscore[i] = np.mean(score[i:i + 60]) best_starts = np.argsort(startscore)[::-1] best_A = best_starts[0] best_B = np.extract(np.abs(best_starts - best_A) >= 60.0, best_starts)[0] clip_A = signal[int(best_A * 0.5 * samplerate):][:int(samplerate * 30)] clip_B = signal[int(best_B * 0.5 * samplerate):][:int(samplerate * 30)] feats_A = orig_features[best_A:best_A + 60] feats_B = orig_features[best_B:best_B + 60] if plot: # Plot this stuff out so we can see how we're doing. fig = plt.figure(1, figsize=(1024 / 96, 1280 / 96), dpi=96) gs = gridspec.GridSpec(3, 1, height_ratios=[1, 1, 12], hspace=0.1) plt.set_cmap('hot') axMatrix = plt.subplot(gs[2]) axMatrix.set_aspect(1.) axScore = plt.subplot(gs[0], sharex=axMatrix) axStart = plt.subplot(gs[1], sharex=axMatrix) axScore.matshow(np.tile(score, (36, 1))) axScore.axis('off') axScore.axvline(best_score) axScore.text(best_score, 0, "Max") axScore.axvspan(best_A, best_A + 60, color='blue', alpha=0.4) axScore.text(best_A, 0, "A") axScore.axvspan(best_B, best_B + 60, color='blue', alpha=0.4) axScore.text(best_B, 0, "B") axStart.plot(startscore) axStart.set_ylim([0, 1]) axStart.get_xaxis().set_visible(False) axStart.axvline(best_A) axStart.text(best_A, 0, "A") axStart.axvline(best_B) axStart.text(best_B, 0, "B") axMatrix.matshow(sim_matrix) axMatrix.axis('off') plt.savefig(_cache(track, '.png'), dpi=96, bbox_inches='tight') return ((clip_A, feats_A), (clip_B, feats_B))
def _calc_feats(path): samplerate, data = scipy.io.wavfile.read(path) featseries = analysis.extract(data, samplerate) return featseries