Drag Race UK

Last updated on Mar 17, 2021 4 min read

This analysis uses the package rtweet to collect tweets containing the #DragRaceUK hashtag - there is a good tutorial here if you want more information.

First, load the relevant packages, download the tweets, and clean it up a little.

library(rtweet)
library(tidytext)
library(tidyverse)
library(gridExtra)
library(lubridate)

# get tweets with the #DragRaceUK and #Team hashtags

tweets <- search_tweets("#DragRaceUK OR #TeamLawrence OR #TeamBimini OR #TeamEllie OR #TeamTayce", n = 18000, include_rts = FALSE)

dat <- tweets %>%
  mutate(text = str_replace_all(text, "[^\x01-\x7F]", ""),
         text = str_replace_all(text, "#DragRaceUK", ""),
         text = str_replace_all(text, "DragRace", ""),
         text = str_replace_all(text, "dragrace", ""),
         text = str_replace_all(text, "\\.|[[:digit:]]+", ""),
         text = str_replace_all(text, "https|amp|tco", ""))%>%
  select(created_at, text)%>%
  mutate(tweet = row_number())

Now, use tidy text tools to separate the words.

# create tidy text

dat_token <- dat %>%  
  unnest_tokens(word, text) %>%
  anti_join(stop_words, by = "word")

# convert tidy text back to wide so that all in lower case etc.

dat_lower <- dat_token %>%
  group_by(tweet) %>%
  summarise(text = str_c(word, collapse = " "))

## `summarise()` ungrouping output (override with `.groups` argument)

# add in column to say if each queen mentioned in tweet

dat_lower <- dat_lower %>% 
  mutate(lawrence = case_when(str_detect(text, ".lawrence") ~ TRUE, TRUE ~ FALSE),
         ellie = case_when(str_detect(text, ".ellie") ~ TRUE, TRUE ~ FALSE),
         tayce = case_when(str_detect(text, ".tayce") ~ TRUE, TRUE ~ FALSE),
         bimini = case_when(str_detect(text, ".bimini") ~ TRUE, TRUE ~ FALSE)
  )

# create tidy text with the mention columns

dat_mentions <- dat_lower %>%  
  unnest_tokens(word, text) %>%
  anti_join(stop_words, by = "word")

The first plot looks at the raw number of tweets that mention each Queen. This isn’t a perfect measure because it relies upon people on Twitter spelling the names correctly and people on the internet can’t spell.

dat_token %>%
  filter(word %in% c("lawrence", "bimini", "tayce", "ellie"))%>%
  count(word, sort = TRUE) %>%
  head(20) %>%
  mutate(word = str_to_title(word),
         word = reorder(word, n))%>%
  ggplot(aes(x = word, y = n, fill = word)) +
  geom_col(show.legend = FALSE) +
  coord_flip()+
  scale_y_continuous(name = "Number of tweets")+
  scale_x_discrete(name = "Queens") +
  theme_minimal() +
  scale_fill_viridis_d()

dat_token %>%
  filter(word %in% c("teamlawrence", "teambimini", "teamtayce", "teamellie"))%>%
  count(word, sort = TRUE) %>%
  head(20) %>%
  mutate(word = str_to_title(word),
         word = reorder(word, n))%>%
  ggplot(aes(x = word, y = n, fill = word)) +
  geom_col(show.legend = FALSE) +
  coord_flip()+
  scale_y_continuous(name = "Number of tweets")+
  scale_x_discrete(name = "Queens") +
  theme_minimal() +
  scale_fill_viridis_d()

The next lot of code runs a sentiment analysis on the tweets that each Queen is mentioned in. Sentiment analyses using existing ratings of words (e.g., if they’re positive or negative) to give you a sense of whether the queen is being mentioned in a tweet that is overall positive or negative in tone. Again it’s not perfect, it can’t cope with slang (e.g., it will think that a sickening death drop is a bad thing), but it does have face validity.

# do a sentiment analysis for each queen

bimini <- dat_mentions %>%
  filter(bimini == "TRUE")%>%
  inner_join(get_sentiments("bing"))%>%
  count(index = tweet, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)%>%
  mutate(queen = "bimini")

lawrence <- dat_mentions %>%
  filter(lawrence == "TRUE")%>%
  inner_join(get_sentiments("bing"))%>%
  count(index = tweet, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)%>%
  mutate(queen = "lawrence")

tayce <- dat_mentions %>%
  filter(tayce == "TRUE")%>%
  inner_join(get_sentiments("bing"))%>%
  count(index = tweet, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)%>%
  mutate(queen = "tayce")

ellie <- dat_mentions %>%
  filter(ellie == "TRUE")%>%
  inner_join(get_sentiments("bing"))%>%
  count(index = tweet, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)%>%
  mutate(queen = "ellie")

# combine sentiment analysis for each queen into one tibble and then calculate total positive, negative
# and overall sentiment scores for each queen

dat_sentiment <- bind_rows(lawrence, ellie, bimini, tayce) %>%
  group_by(queen) %>%
  summarise(positive = sum(positive),
            negative = sum(negative),
            overall = sum(sentiment))%>%
  gather(positive:overall, key = type, value = score)%>%
  mutate(type = factor(type, levels = c("positive", "negative", "overall")))%>%
  mutate(queen = factor(queen, levels = c("lawrence", "ellie", "bimini", "tayce")))

# display table of sentiment scores

tbl <- dat_sentiment %>%
  spread(type, score)%>%
  arrange(desc(overall))

grid.table(tbl)

# create plot of the sentiment scores by each queen, ordered by overall score

ggplot(dat_sentiment, aes(x = type, y = score, fill = type)) +
  stat_identity(geom = "bar", position = "dodge", show.legend = FALSE)+
  facet_wrap(~ queen, ncol = 3)+
  coord_flip()+
  scale_fill_manual(values = c("positive" = "green", "negative" = "red", "overall" = "blue"))

rstats

Emily Nordmann

Senior Lecturer in Psychology, Associate Dean of Learning and Teaching

I am a teaching-focused Senior lecturer and conduct research into the relationship between learning, student engagement, and technology.

Drag Race UK

Emily Nordmann

Senior Lecturer in Psychology, Associate Dean of Learning and Teaching

Related