Sentiment analysis using NLTK SentimentIntensityAnalyzer and NRC Lexicon
Download lexicon:
# Make data directory if it doesn't exist
!mkdir -p data
!wget -nc https://nyc3.digitaloceanspaces.com/ml-files-distro/v1/upshot-trump-emolex/data/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt -P data
Define processing task:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
#nltk.download('vader_lexicon')
def load_emolex_lexicon():
emolex_lexicon = {}
with open('/content/data/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt', 'r') as file:
for line in file:
line = line.strip()
if line:
try:
word, emotion, value = line.split('\t')
if emotion == 'positive' and float(value) > 0.0:
emolex_lexicon[word] = 1
elif emotion == 'negative' and float(value) > 0.0:
emolex_lexicon[word] = -1
except ValueError:
pass
return emolex_lexicon
def analyze_sentiment(text):
sia = SentimentIntensityAnalyzer()
sentiment_scores = sia.polarity_scores(text)
emolex_lexicon = load_emolex_lexicon()
emolex_scores = {
'pos': sum(sentiment_scores[word] for word in sentiment_scores if word in emolex_lexicon and sentiment_scores[word] > 0),
'neg': sum(sentiment_scores[word] for word in sentiment_scores if word in emolex_lexicon and sentiment_scores[word] < 0),
'neu': sum(sentiment_scores[word] for word in sentiment_scores if word not in emolex_lexicon),
'compound': sentiment_scores['compound']
}
return emolex_scores
text = "This is a great day!"
scores = analyze_sentiment(text)
print(scores)