Sentiment analysis using NLTK SentimentIntensityAnalyzer and NRC Lexicon

Download lexicon:

# Make data directory if it doesn't exist
!mkdir -p data
!wget -nc https://nyc3.digitaloceanspaces.com/ml-files-distro/v1/upshot-trump-emolex/data/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt -P data

Define processing task:

import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

#nltk.download('vader_lexicon')

def load_emolex_lexicon():
    emolex_lexicon = {}
    with open('/content/data/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt', 'r') as file:
        for line in file:
            line = line.strip()
            if line:
                try:
                    word, emotion, value = line.split('\t')
                    if emotion == 'positive' and float(value) > 0.0:
                        emolex_lexicon[word] = 1
                    elif emotion == 'negative' and float(value) > 0.0:
                        emolex_lexicon[word] = -1
                except ValueError:
                    pass
    return emolex_lexicon

def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment_scores = sia.polarity_scores(text)
    emolex_lexicon = load_emolex_lexicon()
    emolex_scores = {
        'pos': sum(sentiment_scores[word] for word in sentiment_scores if word in emolex_lexicon and sentiment_scores[word] > 0),
        'neg': sum(sentiment_scores[word] for word in sentiment_scores if word in emolex_lexicon and sentiment_scores[word] < 0),
        'neu': sum(sentiment_scores[word] for word in sentiment_scores if word not in emolex_lexicon),
        'compound': sentiment_scores['compound']
    }
    return emolex_scores

text = "This is a great day!"

scores = analyze_sentiment(text)
print(scores)