pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.1-py3-none-any.whl (6.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.7/6.7 MB 55.9 MB/s eta 0:00:00
Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.9.1)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.2-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.2/199.2 KB 16.2 MB/s eta 0:00:00
Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.25.1)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.6/7.6 MB 53.8 MB/s eta 0:00:00
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.6.2)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.10)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)
Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (4.0.0)
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.2 tokenizers-0.13.2 transformers-4.27.1


import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...

True


df = pd.read_csv('/abcnews-date-text.csv')
df


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1244184 entries, 0 to 1244183
Data columns (total 2 columns):
 #   Column         Non-Null Count    Dtype 
---  ------         --------------    ----- 
 0   publish_date   1244184 non-null  int64 
 1   headline_text  1244184 non-null  object
dtypes: int64(1), object(1)
memory usage: 19.0+ MB


df.publish_date.min()

20030219


df.publish_date.max()

20211231


df.publish_date.nunique()

6882


# Conseqcutive time covered in years:
6882/365

18.854794520547944


# Break up by months and years

df['publish_year'] = df['publish_date'].astype(str).str[:4]
df['publish_year_month'] = df['publish_date'].astype(str).str[:6]

df.head()


sns.displot(df['publish_year'],height=10)

<seaborn.axisgrid.FacetGrid at 0x7fd558f1e3d0>


# Let's trim to 6 first months of 2020:

df = df.loc[(df['publish_year_month'].astype(int) >= 202001) & (df['publish_year_month'].astype(int) <= 202006),:]


sns.displot(df['publish_year_month'])

<seaborn.axisgrid.FacetGrid at 0x7fd4718cf4f0>


from tqdm.notebook import tqdm

df


text1 = df.loc[1189850,'headline_text']
text2 = df.loc[1194500,'headline_text']
text3 = df.loc[1203030,'headline_text']

print(text1,'\n',text2,'\n',text3)

locals evacuate from queensland dam failure at talgai 
 top tips for gardeners during coronavirus crisis 
 love in the time of coronavirus


# Running the pretrained RoBERTa model on "text1" from above:

from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

Downloading (…)lve/main/config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]


encoded_text1 = tokenizer(text1, return_tensors='pt')


encoded_text1

{'input_ids': tensor([[    0, 26516,  1536, 15013,    31, 33662,  1245,  9656,  2988,    23,
         15079,   571,  1439,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


text_output = model(**encoded_text1)

text_output

SequenceClassifierOutput(loss=None, logits=tensor([[ 1.0781,  1.2723, -2.4418]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)


# convert to numpy
scores = text_output[0][0].detach().numpy()

scores

array([ 1.0780689,  1.2722869, -2.4417725], dtype=float32)


# convert the vector of numbers into a vector of probabilities from 0 to 1
scores = softmax(scores)

scores

# Refers to negative, neutral, positive in that order

array([0.4456397 , 0.5411675 , 0.01319278], dtype=float32)


# Setting up VADER sentiment analysis:

from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()


# VADER scores for "text1" for comparison with RoBERTa:

sia.polarity_scores(text1)

{'neg': 0.32, 'neu': 0.68, 'pos': 0.0, 'compound': -0.5106}


# Try for other example texts:

# Note that this code snippet benefited from https://www.kaggle.com/code/robikscube/sentiment-analysis-python-youtube-tutorial/notebook

def roberta_score_generator(text):
    encoded_text = tokenizer(text, return_tensors='pt')
    text_output = model(**encoded_text)
    scores = text_output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'roberta_neg' : scores[0],
        'roberta_neu' : scores[1],
        'roberta_pos' : scores[2]
    }
    return scores_dict


print(text1)

print('Roberta Scores:')
print(roberta_score_generator(text2))

print('VADER Scores:')
print(sia.polarity_scores(text1))

locals evacuate from queensland dam failure at talgai
Roberta Scores:
{'roberta_neg': 0.1348907, 'roberta_neu': 0.79417163, 'roberta_pos': 0.07093765}
VADER Scores:
{'neg': 0.32, 'neu': 0.68, 'pos': 0.0, 'compound': -0.5106}


print(text2)

print('Roberta Scores:')
print(roberta_score_generator(text2))

print('VADER Scores:')
print(sia.polarity_scores(text2))

top tips for gardeners during coronavirus crisis
Roberta Scores:
{'roberta_neg': 0.1348907, 'roberta_neu': 0.79417163, 'roberta_pos': 0.07093765}
VADER Scores:
{'neg': 0.376, 'neu': 0.459, 'pos': 0.165, 'compound': -0.5106}


print(text3)

print('Roberta Scores:')
print(roberta_score_generator(text3))

print('VADER Scores:')
print(sia.polarity_scores(text3))

love in the time of coronavirus
Roberta Scores:
{'roberta_neg': 0.17384635, 'roberta_neu': 0.7102155, 'roberta_pos': 0.11593816}
VADER Scores:
{'neg': 0.0, 'neu': 0.543, 'pos': 0.457, 'compound': 0.6369}


# Run both sentiment ascription techniques on our entire df:

# Note that this code snippet benefited from https://www.kaggle.com/code/robikscube/sentiment-analysis-python-youtube-tutorial/notebook

results = {}

index_value = 0
for i, row in tqdm(df.iterrows(),total=len(df)):
  text = row['headline_text']
  vader_results = sia.polarity_scores(text)
  vader_dict = {}
  for key, value in vader_results.items():
    vader_dict[f'vader_{key}'] = value
  roberta_dict = roberta_score_generator(text)
  vader_roberta_dict = {**vader_dict,**roberta_dict}
  results[index_value] = vader_roberta_dict
  index_value += 1

  0%|          | 0/18680 [00:00<?, ?it/s]


results_df = pd.DataFrame(data=results).T


results_df['index'] = df.index.values

results_df


results_df = results_df.set_index('index')


results_df.index.name = None

results_df


final_df = df.merge(results_df, left_index=True, right_index=True)
final_df['publish_month_day'] = df['publish_date'].astype(str).str[4:]


final_df


avg_results_by_day = final_df[['publish_month_day', 'vader_neg',	'vader_neu',	'vader_pos',	'vader_compound',	'roberta_neg',	'roberta_neu',	'roberta_pos']].groupby(['publish_month_day']).mean()

avg_results_by_day


### VADER Scores across time

## X axis are the days from January through June.

plt.figure(figsize=(20,10))
plt.plot([])
plt.scatter(avg_results_by_day.index.values, avg_results_by_day['vader_neg'])

<matplotlib.collections.PathCollection at 0x7fd466d026a0>


sns.histplot(avg_results_by_day['vader_neg'])

<Axes: xlabel='vader_neg', ylabel='Count'>


### Roberto Model Scores across time

## X axis are the days from January through June.

plt.figure(figsize=(20,10))
plt.scatter(avg_results_by_day.index.values, avg_results_by_day['roberta_neg'])

# Interestingly, both VADER and RoBERTA approaches show a slight reduction in negativity levels in the latter half of the 202201-202206 period looked at.

<matplotlib.collections.PathCollection at 0x7fd46686a8e0>


sns.histplot(avg_results_by_day['roberta_neg'])

<Axes: xlabel='roberta_neg', ylabel='Count'>


plt.figure(figsize=(20,10))
plt.plot([])
plt.scatter(avg_results_by_day.index.values, avg_results_by_day['vader_pos'])

<matplotlib.collections.PathCollection at 0x7fd46650f550>


sns.histplot(avg_results_by_day['vader_pos'])

<Axes: xlabel='vader_pos', ylabel='Count'>


plt.figure(figsize=(20,10))
plt.scatter(avg_results_by_day.index.values, avg_results_by_day['roberta_pos'])

<matplotlib.collections.PathCollection at 0x7fd4661f7b80>


sns.histplot(avg_results_by_day['roberta_pos'])

<Axes: xlabel='roberta_pos', ylabel='Count'>


plt.figure(figsize=(20,10))
plt.scatter(avg_results_by_day.index.values, avg_results_by_day['vader_neu'])

<matplotlib.collections.PathCollection at 0x7fd465847fd0>


sns.histplot(avg_results_by_day['vader_neu'])

<Axes: xlabel='vader_neu', ylabel='Count'>


plt.figure(figsize=(20,10))
plt.scatter(avg_results_by_day.index.values, avg_results_by_day['roberta_neu'])

<matplotlib.collections.PathCollection at 0x7fd4654c7eb0>


sns.histplot(avg_results_by_day['roberta_neu'])

<Axes: xlabel='roberta_neu', ylabel='Count'>


## Let's see the Roberta Model's top Negative and and compare with VADER:

final_df[['roberta_neg','vader_neg','headline_text']].sort_values('roberta_neg',ascending=False)

# The top ones are indeed very strongly negative
# The bottom rows are also very positive


## Let's see the VADER Model's top Negative and and compare with Roberta:

final_df[['roberta_neg','vader_neg','headline_text']].sort_values('vader_neg',ascending=False)

# These are simply words that have an associated high 'negativity' weight, but the context is not pointing strongly in that direction


## Let's see the Roberta Model's top Positive and and compare with VADER:

final_df[['roberta_pos','vader_pos','headline_text']].sort_values('roberta_pos',ascending=False)

# See also final rows which are presumable high on the negative scale


## Let's see how our VADER approach performed:

final_df[['roberta_pos','vader_pos','headline_text']].sort_values('vader_pos',ascending=False)

# We see that the top postive under VADER are not that 'positive' from a human perspective


## Let's see the Roberta Model's top Neutral and and compare with VADER:

final_df[['roberta_neu','vader_neu','headline_text']].sort_values('roberta_neu',ascending=False)

# These initial rows are indeed very strongly neutral as they are records of neutral facts (briefing date, a headline about some footage, etc)


## Let's see the how our VADER approach performed:

final_df[['roberta_neu','vader_neu','headline_text']].sort_values('vader_neu',ascending=False)


## Here's how the distribution of Positive, Neutral and Negative played out with both approaches:

# Add a column with a pos, neg, neu value based on the highest probability assigned:

final_df['vader_sentiment'] = final_df[['vader_pos','vader_neu','vader_neg']].idxmax(axis=1)
final_df['roberta_sentiment'] = final_df[['roberta_pos','roberta_neu','roberta_neg']].idxmax(axis=1)


sns.histplot(final_df['roberta_sentiment'])

<Axes: xlabel='roberta_sentiment', ylabel='Count'>


sns.histplot(final_df['vader_sentiment'])

<Axes: xlabel='vader_sentiment', ylabel='Count'>

	publish_date	headline_text	publish_year	publish_year_month
1186018	20200101	a new type of resolution for the new year	2020	202001
1186019	20200101	adelaide records driest year in more than a de...	2020	202001
1186020	20200101	adelaide riverbank catches alight after new ye...	2020	202001
1186021	20200101	adelaides 9pm fireworks spark blaze on riverbank	2020	202001
1186022	20200101	archaic legislation governing nt women propert...	2020	202001
...	...	...	...	...
1204693	20200630	wa jobs figures politics covid wrap mcgowan	2020	202006
1204694	20200630	wa premier says abs job figures are encouraging	2020	202006
1204695	20200630	where in the world coronavirus cases australia...	2020	202006
1204696	20200630	winx and hugh bowman immortalised in dunedoo	2020	202006
1204697	20200630	worry for former home of dorothea mackellar ne...	2020	202006

	vader_neg	vader_neu	vader_pos	vader_compound	roberta_neg	roberta_neu	roberta_pos	index
0	0.000	1.000	0.000	0.0000	0.011391	0.448468	0.540141	1186018
1	0.000	1.000	0.000	0.0000	0.426486	0.490030	0.083484	1186019
2	0.000	1.000	0.000	0.0000	0.011920	0.859320	0.128760	1186020
3	0.000	0.759	0.241	0.2263	0.331425	0.641182	0.027393	1186021
4	0.000	1.000	0.000	0.0000	0.164553	0.809557	0.025891	1186022
...	...	...	...	...	...	...	...	...
18675	0.000	1.000	0.000	0.0000	0.095382	0.871319	0.033299	1204693
18676	0.000	0.673	0.327	0.5267	0.003929	0.126360	0.869711	1204694
18677	0.000	1.000	0.000	0.0000	0.202884	0.767934	0.029182	1204695
18678	0.000	1.000	0.000	0.0000	0.008963	0.833802	0.157235	1204696
18679	0.225	0.775	0.000	-0.4404	0.602002	0.384958	0.013040	1204697

	vader_neg	vader_neu	vader_pos	vader_compound	roberta_neg	roberta_neu	roberta_pos
1186018	0.000	1.000	0.000	0.0000	0.011391	0.448468	0.540141
1186019	0.000	1.000	0.000	0.0000	0.426486	0.490030	0.083484
1186020	0.000	1.000	0.000	0.0000	0.011920	0.859320	0.128760
1186021	0.000	0.759	0.241	0.2263	0.331425	0.641182	0.027393
1186022	0.000	1.000	0.000	0.0000	0.164553	0.809557	0.025891
...	...	...	...	...	...	...	...
1204693	0.000	1.000	0.000	0.0000	0.095382	0.871319	0.033299
1204694	0.000	0.673	0.327	0.5267	0.003929	0.126360	0.869711
1204695	0.000	1.000	0.000	0.0000	0.202884	0.767934	0.029182
1204696	0.000	1.000	0.000	0.0000	0.008963	0.833802	0.157235
1204697	0.225	0.775	0.000	-0.4404	0.602002	0.384958	0.013040

	publish_date	headline_text	publish_year	publish_year_month	vader_neg	vader_neu	vader_pos	vader_compound	roberta_neg	roberta_neu	roberta_pos	publish_month_day
1186018	20200101	a new type of resolution for the new year	2020	202001	0.000	1.000	0.000	0.0000	0.011391	0.448468	0.540141	0101
1186019	20200101	adelaide records driest year in more than a de...	2020	202001	0.000	1.000	0.000	0.0000	0.426486	0.490030	0.083484	0101
1186020	20200101	adelaide riverbank catches alight after new ye...	2020	202001	0.000	1.000	0.000	0.0000	0.011920	0.859320	0.128760	0101
1186021	20200101	adelaides 9pm fireworks spark blaze on riverbank	2020	202001	0.000	0.759	0.241	0.2263	0.331425	0.641182	0.027393	0101
1186022	20200101	archaic legislation governing nt women propert...	2020	202001	0.000	1.000	0.000	0.0000	0.164553	0.809557	0.025891	0101
...	...	...	...	...	...	...	...	...	...	...	...	...
1204693	20200630	wa jobs figures politics covid wrap mcgowan	2020	202006	0.000	1.000	0.000	0.0000	0.095382	0.871319	0.033299	0630
1204694	20200630	wa premier says abs job figures are encouraging	2020	202006	0.000	0.673	0.327	0.5267	0.003929	0.126360	0.869711	0630
1204695	20200630	where in the world coronavirus cases australia...	2020	202006	0.000	1.000	0.000	0.0000	0.202884	0.767934	0.029182	0630
1204696	20200630	winx and hugh bowman immortalised in dunedoo	2020	202006	0.000	1.000	0.000	0.0000	0.008963	0.833802	0.157235	0630
1204697	20200630	worry for former home of dorothea mackellar ne...	2020	202006	0.225	0.775	0.000	-0.4404	0.602002	0.384958	0.013040	0630

	vader_neg	vader_neu	vader_pos	vader_compound	roberta_neg	roberta_neu	roberta_pos
publish_month_day
0101	0.162712	0.774610	0.062712	-0.148361	0.306990	0.601662	0.091349
0102	0.167403	0.780448	0.052149	-0.158445	0.369189	0.573775	0.057036
0103	0.183358	0.760672	0.055985	-0.168310	0.354603	0.580951	0.064446
0104	0.166035	0.770509	0.063491	-0.139628	0.305385	0.611189	0.083426
0105	0.154897	0.801845	0.043241	-0.172876	0.336040	0.589976	0.073984
...	...	...	...	...	...	...	...
0626	0.113993	0.831701	0.054313	-0.086697	0.284352	0.635887	0.079762
0627	0.075944	0.855629	0.068416	-0.023798	0.227138	0.669483	0.103379
0628	0.087378	0.852595	0.060027	-0.041584	0.276181	0.645749	0.078070
0629	0.123932	0.814246	0.061831	-0.093197	0.305063	0.628929	0.066008
0630	0.102291	0.834135	0.063574	-0.063682	0.324260	0.596680	0.079059

Sentiment Analysis on News Headlines using VADER and RoBERTa¶

Background¶

Installing and Importing Necessary Libraries¶

Loading Our Data and Performing Basic Exploration¶

Comparing VADER and RoBERTa on select headlines:¶

Sample Texts:¶

Applying VADER and RoBERTa Sentiment Classification on Our 6 Months of Headlines:¶

Comparing Negativity Scores¶

Comparing Positivity Scores¶

Comparing Neutral Scores¶

Comparing Most Negative and Most Postive Scored Headlines¶

Comparing Overall Distribution of Sentiments Across Both Models¶

Conclusion¶

	publish_date	headline_text
0	20030219	aba decides against community broadcasting lic...
1	20030219	act fire witnesses must be aware of defamation
2	20030219	a g calls for infrastructure protection summit
3	20030219	air nz staff in aust strike for pay rise
4	20030219	air nz strike to affect australian travellers
...	...	...
1244179	20211231	two aged care residents die as state records 2...
1244180	20211231	victoria records 5;919 new cases and seven deaths
1244181	20211231	wa delays adopting new close contact definition
1244182	20211231	western ringtail possums found badly dehydrate...
1244183	20211231	what makes you a close covid contact here are ...

	publish_date	headline_text	publish_year	publish_year_month
0	20030219	aba decides against community broadcasting lic...	2003	200302
1	20030219	act fire witnesses must be aware of defamation	2003	200302
2	20030219	a g calls for infrastructure protection summit	2003	200302
3	20030219	air nz staff in aust strike for pay rise	2003	200302
4	20030219	air nz strike to affect australian travellers	2003	200302

	roberta_neg	vader_neg	headline_text
1191922	0.979646	0.362	disgusting food served at flinders medical centre
1195196	0.975848	0.313	coronavirus could be the worst thing for the m...
1191818	0.972716	0.360	this is why coronavirus is so bad in iran
1197128	0.971811	0.279	coronavirus causes us economy to shrink worse ...
1194610	0.969573	0.569	coronavirus is a disaster for lonely older aus...
...	...	...	...
1190127	0.001044	0.000	locals overjoyed as barwon river runs at brewa...
1188009	0.001030	0.000	bob hawke college set to welcome first perth s...
1187779	0.001023	0.000	bbl melbourne stars brisbane heat still an exc...
1189811	0.001005	0.000	the maddy prespakis era has arrived good news ...
1202753	0.000825	0.000	tamworth country music festival in good shape ...

	roberta_neg	vader_neg	headline_text
1191908	0.474352	1.000	contagion
1191237	0.555532	0.890	aches; pains and panic
1193238	0.785412	0.886	fearing the worst
1188031	0.250209	0.881	forrestfield fire emergency warning
1195051	0.267040	0.880	fighting fire with fire
...	...	...	...
1196112	0.011387	0.000	cyber agency supports security coronavirus tra...
1196113	0.619087	0.000	deakin university flags redundancies due to co...
1189550	0.382166	0.000	duck hunting incidents captured on video
1189548	0.314636	0.000	dismantled byron sculpture raises more than it...
1186018	0.011391	0.000	a new type of resolution for the new year

	roberta_pos	vader_pos	headline_text
1202329	0.988756	0.352	william callaghhans mum cant wait to meet amazing
1195165	0.985282	0.344	respite foster carers give kids the best weeke...
1198295	0.981007	0.318	mothers day the best things my kids have taugh...
1204367	0.970171	0.658	amazing video captures beautiful cloud waterfalls
1187467	0.965863	0.423	ben simmons just played the best game of his n...
...	...	...	...
1201469	0.002010	0.000	wife died of coronavirus as cummings flouted r...
1190535	0.001958	0.000	hannah clarke murder domestic violence what we...
1201407	0.001947	0.132	mentally ill verdict changes disappoint daught...
1198626	0.001725	0.000	coronavirus kills uk rail worker who was spat on
1203262	0.001625	0.000	why are so many indonesian children dying from...

	roberta_pos	vader_pos	headline_text
1191862	0.097523	0.855	marvel movies popularity
1204257	0.183924	0.745	super netball super shot not so super
1198786	0.476929	0.735	gilmour space technologies wins defence agreement
1191531	0.054509	0.730	coronavirus special
1203763	0.137088	0.722	growth figures
...	...	...	...
1193152	0.024660	0.000	coronavirus recession property market
1193153	0.025840	0.000	coronavirus self isolation a struggle for larg...
1193155	0.006996	0.000	coronavirus staff told not to return to work
1193156	0.046535	0.000	coronavirus the case for endgame c stop almost...
1204697	0.013040	0.000	worry for former home of dorothea mackellar ne...

	roberta_neu	vader_neu	headline_text
1189766	0.954659	1.000	george pell high court appeal hearing date set...
1186407	0.952936	1.000	tasmania news briefing monday 6 january
1190430	0.952193	1.000	drone footage of wuhan taken on january 23; 2020
1202033	0.950862	1.000	14 candidates nominate to contest the eden mon...
1196147	0.948635	1.000	nsw announces from may 11 quarter of students ...
...	...	...	...
1195196	0.022139	0.687	coronavirus could be the worst thing for the m...
1191922	0.017940	0.638	disgusting food served at flinders medical centre
1198295	0.017162	0.682	mothers day the best things my kids have taugh...
1195165	0.012939	0.656	respite foster carers give kids the best weeke...
1202329	0.009502	0.648	william callaghhans mum cant wait to meet amazing

	roberta_neu	vader_neu	headline_text
1186018	0.448468	1.000	a new type of resolution for the new year
1192725	0.858013	1.000	nsw government reveals 2.3 billion coronavirus...
1192727	0.242933	1.000	paraplegic man left deflated over jetstar disa...
1192728	0.295586	1.000	portable artworks found for first time in sout...
1192729	0.859008	1.000	premier peter gutwein explains the new health
...	...	...	...
1186746	0.671792	0.108	cheryl grimmer abduction suspected murder 1 mi...
1186718	0.631355	0.099	powerful photos combat bullying
1202868	0.311536	0.000	no justice; no peace
1204222	0.482112	0.000	no trust
1191908	0.437776	0.000	contagion