visualization2

1 Data Description

library(tidyverse)
library(ggplot2)
library(DT)
library(haven)
library(ggpubr)
library(cowplot)
library(ggthemes)
library(readr)
library(ggExtra)
library(scales)
library(Rmisc)
library(ggbeeswarm) # to make jitter plots  
library(RColorBrewer) # for the colors 
library(patchwork)
library(ggstatsplot)
library(ggsci)
# load raw data 
df_w1 <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1.rds")
df_w1_supp <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1_supp.rds")
df_w2 <- readRDS("~/Downloads/2023-095b_files/2023-095b_client_wave-2.rds")

# bind wave 1 data + filter only Progress == 100 
df_w1 = rbind(df_w1, df_w1_supp)
df_w1 |>
  select(-user_id) |> 
  filter(Progress==100) -> df_w1

# invited users list (for merge)
Users_to_invite_Wave2 <- read_csv("Users_to_invite_Wave2.csv", 
    col_types = cols(user_id = col_character()))
Users_to_invite_Wave2 |> 
  select(user_id) |> 
  mutate(userid = user_id) -> for_merge

# merged with invited user list 
df_w2 |>
  merge(for_merge, by='userid') -> w2_users

# merged with invited user list 
w2_users |> 
  select(userid, user_id) |>
  merge(df_w1, by='userid') -> w1_users

# drop randomized_group == 'drop' from wave 2
w2_users |>
  filter(randomized_group != 'drop') |>
  filter(randomized_group != 'muting_treatment2_drop') |>
  select(userid, randomized_group) -> wave2_data

names(wave2_data) = c('user_id', 'randomized_group')
# 134 users without drop 

# muted_accounts
muted_accounts <- read_csv("muted_accounts.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

# exposure data 
pre_hometimeline <- read_csv("pre_hometimeline_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character())) |> select(!created_at)
pre_hometimeline |> 
  mutate(tag = "pre_hometimeline") -> pre_hometimeline

post_hometimeline <- read_csv("post_hometimeline_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character())) |> select(!created_at)

post_hometimeline |> 
  mutate(tag = "post_hometimeline") -> post_hometimeline

# engagement data
pre_engagement <- read_csv("pre_engagement_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

pre_engagement |> 
  mutate(tag = "pre_engagement") -> pre_engagement

post_engagement <- read_csv("post_engagement_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

post_engagement |> 
  mutate(tag = "post_engagement") -> post_engagement

# likes data
pre_likes <- read_csv("pre_likes_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))
names(pre_likes)[3] = "type"
pre_likes |> 
  mutate(tag = "pre_likes") -> pre_likes

post_likes <- read_csv("post_likes_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))
names(post_likes)[3] = "type"

post_likes |> 
  mutate(tag = "post_likes") -> post_likes

# truncated list
updated_inventory <- read_csv("updated_inventory.csv", 
    col_types = cols_only(target_user_id = col_character())) 
reduced_inventory = updated_inventory[c(1:489),]
wave2_data |> merge(pre_engagement, by='user_id') -> this_1
wave2_data |> merge(post_engagement, by='user_id') -> this_2
wave2_data |> merge(pre_hometimeline, by='user_id') -> this_3
wave2_data |> merge(post_hometimeline, by='user_id') -> this_4
wave2_data |> merge(pre_likes, by='user_id') -> this_5
wave2_data |> merge(post_likes, by='user_id') -> this_6
rbind(this_1, this_2, this_3, this_4, this_5, this_6) -> test
reduced_inventory |> mutate(reduced = "yes") -> reduced_inventory 
muted_accounts |> mutate(muted = "yes") -> muted_accounts

test |> 
  merge(muted_accounts, by = c('user_id', 'target_user_id'), all.x = TRUE) |>
  mutate(
    muted = ifelse(is.na(muted), "no", muted)
  ) |>
  merge(reduced_inventory, by='target_user_id', all.x = TRUE) |>
  mutate(
    reduced = ifelse(is.na(reduced), "no", reduced)
  ) |>
  relocate(c(user_id, randomized_group)) -> this_is_full_data
# count_engagement.csv 
count_engagement <- read_csv("count_engagement.csv", 
    col_types = cols(user_id = col_character()))

this_is_full_data |>
  merge(count_engagement, by=c('user_id','tag') ,all.x=TRUE) -> this_is_final_data  

this_is_final_data |> 
  filter(user_id!="1165632418449186822" & user_id !='1590859787029213186' & user_id != '975899686774165504' & user_id != '375334044' & user_id != '1354968197078900743') -> this_is_final_data

Data: this_is_final_data

N=128 (control: 46, media_literacy: 40, muting_treatment1: 41, muting_treatment2: 1)

Let’s remove those accounts that blocked the study account or are locked/suspended (we could not collect post-treatment data).

✅ N=123 (control: 45, media_literacy: 38, muting_treatment1: 40)

(user level)

  • user_id: Participants’ Twitter IDs

  • randomized_group: Randomized group - media_literacy, muting_treatment1/2, control

  • tag:

    • pre_hometimeline/engagement: home timeline (reverse chronological) or engagement data collected before treatment (wave 2)

    • post_hometimeline/engagement: home timeline or engagement data collected after treatment (wave 2) ~ till Mar 14

      • I collected post-treatment data on Mar 14, which is 2 weeks after the start of Wave 2. Thus, the data contains maximum 2 weeks ~ minimum 1 week post-treatment.
  • count: Number of tweets collected

(user-LQ account dyad level)

  • target_user_id: LQ accounts’ Twitter IDs (found in home timeline or engagement data)

  • type: Type of tweets related to LQ sources

    • direct: Direct tweets mentioning LQ sources; Direct tweets produced by LQ sources for likes

    • indirect: (only for likes) likes of tweets that mention LQ sources (@) or their URLs

    • replied: Reply tweets to LQ sources

    • retweeted: Any retweets of LQ tweets

    • quoted: Any quote tweets of LQ tweets

  • muted : Whether the LQ source is muted (“yes”) or not (“no”)

  • reduced : Whether the LQ source is included in the reduced list (“yes”) or not (“no”)

1.0.0.1 Filtering out quoted from exposure data

When we manually checked, it turned out API returned those tweets of LQ sources that are quoted by third-party accounts but we don’t see these tweets in home timelines. Hence, I removed all the type == “quoted” cases from home timeline tweets.

this_is_final_data |>
  filter(!c(tag == "post_hometimeline" & type=="quoted")) |>
  filter(!c(tag == "pre_hometimeline" & type=="quoted")) -> this_is_final_data
this_is_final_data |>
  relocate(user_id, randomized_group, tag, count) |>
  datatable(filter='top') 

2 Task 1. First-Stage Check

For the mute group, post-treatment engagements/exposure to “muted” accounts vs. For the control group, post-treatment engagements/exposure to hypothetically “muted” accounts (accounts that would have been muted if they were assigned to the muting group)

  1. For Muting group: Post-treatment engagements with/exposure to/likes of “muted” sources
  2. For Control group (control + media_literacy): Post-treatment engagements with/exposure to/likes of “hypothetically muted” sources

(This removed those users who have engagements/exposures exclusively to the LQ accounts that are “not” muted either in real or hypothetically, resulting in 94 unique users for the task 1.)


FYI) For muted accounts that you follow:

  • Replies and mentions by the muted account will still appear in your Notifications tab.

  • posts from a muted account – posted before the account was muted – will be removed from your Home timeline.

  • When you click or tap into a conversation, replies from muted accounts will be visible.  


this_is_final_data |>
  mutate(
    group = ifelse(randomized_group=="media_literacy" | 
                     randomized_group=='control', "Control", "Muting")
  ) |>
  mutate(flag = ifelse(
    group == "Control" & muted == "yes", "include", 
    # For Control group, only include cases where muted == "yes" 
    ifelse( 
      group == "Muting" & muted == "yes", "include", 
      # For Muting group, only include cases where muted == "yes" 
      "drop" 
      # The rest of the cases (muted == "no"), drop 
    )
  )) |> 
  filter(flag == "include") |> select(!flag) -> test_again

test_again |> 
  select(user_id, tag, randomized_group, count) |> 
  group_by(user_id, tag) |>
  count() |> ungroup() -> test_for_muted

# test_for_muted - if there are no categories (tag), freq == 0
test_for_muted |> 
  select(user_id, tag, freq) |>
  pivot_wider(names_from=tag, values_from=freq) |> 
  mutate_if(is.numeric , replace_na, replace = 0) |>
  relocate(user_id, 
           pre_hometimeline, post_hometimeline, 
           pre_engagement, post_engagement,
           pre_likes, post_likes) -> test_for_muted_wider

test_for_muted_wider |> 
  select(user_id, pre_hometimeline:post_likes) |> 
  pivot_longer(cols=pre_hometimeline:post_likes, 
               names_to = "tag",
               values_to = "n") -> test_for_muted_longer
test_again |> 
  select(user_id, tag, count) |> 
  unique() |>
  merge(test_for_muted_longer, by=c("user_id", "tag"), all=TRUE) |>
  mutate_if(is.numeric , replace_na, replace = 0) -> test_for_muted_longer2

test_again |> select(user_id, tag, randomized_group) |> unique() -> for_merge_muted_1
for_merge_muted_1 |> 
  select(user_id, randomized_group) |> 
  unique() -> for_merge_muted_2

test_for_muted_longer2 |> 
  merge(for_merge_muted_1, by = c('user_id', 'tag'), all.x = T) |> 
  select(user_id, tag, count, n) |> 
  merge(for_merge_muted_2, by = "user_id") |> 
  mutate(group = ifelse(randomized_group == "control" | 
                          randomized_group == "media_literacy", 
                        "Control", "Muting")) -> test_for_muted_longer3

# backup
df_plot_muted = test_for_muted_longer3

2.1 Plot 1. Post-treatment engagement

Post-treatment engagement (tweets, re-tweets, quotes, replies) with (hypothetically) muted accounts between control vs. muting

df_plot_muted |>
  mutate(rate = (n/count)*100) |> 
  mutate_if(is.numeric , replace_na, replace = 0) |>
  mutate(rate = round(rate,1)) |>
  filter(tag == 'post_engagement') |> # tag == 'post_engagement' filter
  select(user_id, group, n, rate) -> df_post_engagement

df_post_engagement |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_post_engagement |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_post_engagement |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  scale_y_continuous(n.breaks=15) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
        paste0(
            "Control: mean = ", signif(mean(group1), 2), 
            "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
            "; IQR = ", signif(IQR(group1), 2), "\n",
            "Muting: mean = ", signif(mean(group2), 2), 
            "; sd = ", signif(sd(group2), 2),
            "; median = ", signif(median(group2), 2), 
            "; IQR = ", signif(IQR(group2), 2)
        ))  -> bar_post_engagement

df_post_engagement |>
  ggplot(aes(x = group, y = n)) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.8, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.8, size = 3,
    aes(fill = group)) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) + 
  scale_y_sqrt(n.breaks=15) + 
  labs(x = "Group", 
       y = "Count (sqrt scale)",
       caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
    theme_classic() +
    theme(
        text = element_text(size = 12, face = "bold", color = "black"),
        axis.text = element_text(color = "black"),
        legend.position = "none",
        plot.title = element_text(size = 10),
        plot.caption = element_text(hjust = 0)
    ) +
    ggtitle(
        paste0(
            "Control: mean = ", signif(mean(group1), 2), 
            "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
            "; IQR = ", signif(IQR(group1), 2), "\n",
            "Muting: mean = ", signif(mean(group2), 2), 
            "; sd = ", signif(sd(group2), 2),
            "; median = ", signif(median(group2), 2), 
            "; IQR = ", signif(IQR(group2), 2)
        )) -> sqrt_post_engagement


plot_post_engage <- bar_post_engagement + 
  plot_annotation('Plot 1. Post-Treatment Engagements', 
                  theme=theme(plot.title=element_text(size = 12, 
                                                      face = "bold", 
                                                      color = "black",
                                                      hjust=0.5)))
plot_post_engage

ggsave("../Results/plot1_muted.png", height = 4, width = 8)

The y-axis (Count) indicates the number of tweets related to LQ sources that one engaged with (=engagement). These include direct mentions of LQ accounts, replies, retweets, and quote tweets of tweets created by LQ accounts, as well as direct URLs to these LQ domains.

Group: Control includes control group and media literacy group, while Muting includes muting treatment groups. For Muting group, only engagements with those LQ accounts that are muted are counted. Note that in pilot study, we muted 70% of the truncated list (truncated list: n=489).

Hence, Plot 1 allows us to compare the post-treatment engagements with (hypothetically) muted LQ accounts between Control and Muting groups.

For most participants, we see low levels of engagements with LQ accounts with a few outliers (active ones). Given this highly skewed distribution, and to be careful with comparison, I overlayed the same quantities but with different ways of visualizing them.

  1. Bar plot

    • Comparison: mean

    • Error bars indicate SEs of the mean

    • t-test statistics (p-value presented)

  2. Dot plot

    • Comparison: distribution

    • K-S test (nonparametric)

2.1.0.1 Another approach of visualizing engagements

Ro’ee: “[…] due to outliers and the many zeros, another approach is to create a binary variable of whether someone engaged with any low-quality content, and then compare that share engaging with low quality content across treatments. […] To focus on the extensive margin (any activity) instead of the intensive margin (how much activity) […]”

df_post_engagement |>
  mutate(
    engaged = ifelse(n==0, "NO", "YES") 
  ) |>
  select(engaged, group) |>
  group_by(group, engaged) |> count()
##   engaged   group freq
## 1      NO Control   61
## 2      NO  Muting   21
## 3     YES Control    5
## 4     YES  Muting    3

Only 5 out of 66 in Control (8%=5/66) + 3 out of 24 Muting group (12%) have non-zero post-treatment engagement.


2.2 Plot 2. Post-treatment exposure

Post-treatment exposure between control vs. muting (for muted accounts)

df_plot_muted |>
  filter(tag == "post_hometimeline") |> # tag == 'post_hometimeline' filter
  select(user_id, group, n) -> df_post_hometimeline

df_post_hometimeline |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_post_hometimeline |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_post_hometimeline |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      ))-> bar_post_hometimeline 

plot_post_exposure <- bar_post_hometimeline + 
  plot_annotation('Plot 2-1. Post-Treatment Exposure', 
                  theme=theme(plot.title=element_text(size = 12, 
                                                      face = "bold", 
                                                      color = "black",
                                                      hjust=0.5)))
plot_post_exposure

ggsave("../Results/plot2_muted.png", height = 4, width = 8)

The y-axis (Count) indicates the number of tweets from LQ sources (that are muted) found in one’s home timeline (=exposure).

Hence, Plot 2 allows us to compare the post-treatment exposures to (hypothetically) muted LQ accounts between Control and Muting groups.

2.2.1 Y-axis: Count → Activity Rate

Since we pull reverse chronological home timeline (exposure data) once, how about calculating activity rates, instead of using count?

  • Activity Rate = (# of muted LQ tweets) / (timestamp of the last appeared LQ tweet - timestamp of the first appeared LQ tweet in the collected data)
# exposure data 
post_hometimeline_final <- read_csv("post_hometimeline_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character())) 
post_hometimeline_final |> filter(type!="quoted") -> post_hometimeline_final
aggregate(post_hometimeline_final$created_at, 
          by=list(post_hometimeline_final$user_id), max) -> max_agg
aggregate(post_hometimeline_final$created_at,
          by=list(post_hometimeline_final$user_id), min) -> min_agg
names(max_agg) = c("user_id", "max_time") 
names(min_agg) = c("user_id", "min_time")
max_agg |> merge(min_agg, by='user_id') |> 
  mutate( timeinterval= as.duration(min_time %--% max_time), 
          in_days = round(as.numeric(max_time - min_time)/86400, 5), 
          in_hours =round(as.numeric(max_time - min_time)/3600, 5)) -> merge_agg
# merge timegap variables into df_post_hometimeline
df_post_hometimeline |>
    merge(merge_agg, by='user_id', all.x=TRUE) |>
    mutate_at(c(6:8), ~replace(., is.na(.), 0)) -> post_home_merged

post_home_merged |> 
  mutate(activity_rate = n/in_days) |>
  mutate_at(c(9), ~replace(., is.nan(.), 0)) |>
  mutate_at(c(9), ~replace(., is.infinite(.), 0)) -> post_home_merged

Plot 2-2. Post-Treatment Exposure (Daily activity rate)

  • tweets / day
# post_home_merged 
post_home_merged |> 
  filter(group == "Control") |> 
  select(activity_rate) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

post_home_merged |> 
  filter(group == "Muting") |> 
  select(activity_rate) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

post_home_merged |>
  ggplot(aes(x = group, y = activity_rate)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "tweets / day",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      ))-> bar_post_hometimeline_rate 

plot_post_exposure_rate <- bar_post_hometimeline_rate + 
  plot_annotation('Plot 2-2. Post-Treatment Exposure (Daily activity rate)', 
                  theme=theme(plot.title=element_text(size = 12, 
                                                      face = "bold", 
                                                      color = "black",
                                                      hjust=0.5)))
plot_post_exposure_rate

ggsave("../Results/plot2_muted_rate.png", height = 4, width = 8)

Setting the y-axis into log10 scale to better understand the distribution:

  • log10(1+y) due to many zeros!
post_home_merged |>
  ggplot(aes(x = group, y = log10(activity_rate+1))) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "log10((tweets/day)+1)",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      )) -> bar_post_hometimeline_rate_log

plot_post_exposure_rate_log <- bar_post_hometimeline_rate_log + 
  plot_annotation('Plot 2-3. Post-Treatment Exposure (Daily activity rate; log10(y+1))', 
                  theme=theme(plot.title=element_text(size = 12, 
                                                      face = "bold", 
                                                      color = "black",
                                                      hjust=0.5)))
plot_post_exposure_rate_log

ggsave("../Results/plot2_muted_rate_log.png", height = 4, width = 8)

2.3 Plot 3. Post-treatment Likes

df_plot_muted |>
  filter(tag == "post_likes") |> # tag == 'post_likes' filter
  select(user_id, group, n) -> df_post_likes

df_post_likes |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_post_likes |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_post_likes |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      ))-> bar_post_likes 

plot_post_likes <- bar_post_likes + 
  plot_annotation('Plot 3. Post-Treatment Likes', 
                  theme=theme(plot.title=element_text(size = 12, 
                                                      face = "bold", 
                                                      color = "black",
                                                      hjust=0.5)))
plot_post_likes

ggsave("../Results/plot3_muted.png", height = 4, width = 8)

The y-axis (Count) indicates the number of likes to tweets related to LQ sources that are muted (=likes).

Hence, Plot 3 allows us to compare the post-treatment likes to (hypothetically) muted LQ accounts between Control and Muting groups.

2.3.1 Takeaways

  1. Overall, engagement levels are very low (only handful of participants actively engaging with LQ sources). This might resulted in minimal first-stage effect.
  2. For post-treatment home timeline exposure, we do see some meaningful differences between control vs. muting groups.

2.4 Plot 4. Pre-treatment engagement

Pre-treatment engagement between control vs. muting (for muted accounts)

# retrieve recruitment mode
w1_users |> select(user_id, vs_recruitment) -> recruitmode
# merge
df_plot_muted |>
  merge(recruitmode, by='user_id', all.x = TRUE) -> df_plot_muted_recruit

df_plot_muted_recruit |>
  filter(tag == "pre_engagement") |> 
  select(user_id, group, n, vs_recruitment) -> df_pre_engagement

df_pre_engagement |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_pre_engagement |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 


df_pre_engagement |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      ))-> bar_pre_engagement

bar_pre_engagement + plot_annotation('Plot 4. Pre-Treatment Engagements', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot4_muted.png", height = 4, width = 8)

2.5 Plot 5. Pre-treatment exposure

Pre-treatment exposure between control vs. muting (for muted accounts)

# Use df_plot_unmuted 
df_plot_muted_recruit |>
  filter(tag == "pre_hometimeline") |> # tag == 'post_hometimeline' filter
  select(user_id, group, n, vs_recruitment) -> df_pre_hometimeline

pre_hometimeline_final <- read_csv("pre_hometimeline_final.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character())) 
pre_hometimeline_final |> filter(type!='quoted') -> pre_hometimeline_final
aggregate(pre_hometimeline_final$created_at, 
          by=list(pre_hometimeline_final$user_id), max) -> max_agg
aggregate(pre_hometimeline_final$created_at,
          by=list(pre_hometimeline_final$user_id), min) -> min_agg
names(max_agg) = c("user_id", "max_time") 
names(min_agg) = c("user_id", "min_time")
max_agg |> merge(min_agg, by='user_id') |> 
  mutate( timeinterval= as.duration(min_time %--% max_time), 
          in_days = round(as.numeric(max_time - min_time)/86400, 5), 
          in_hours =round(as.numeric(max_time - min_time)/3600, 5)) -> merge_agg
# merge timegap variables into df_pre_hometimeline
df_pre_hometimeline |>
    merge(merge_agg, by='user_id', all.x=TRUE) |>
    mutate_at(c(7:9), ~replace(., is.na(.), 0)) -> pre_home_merged

pre_home_merged |> 
  mutate(activity_rate = n/in_days) |>
  mutate_at(c(10), ~replace(., is.nan(.), 0)) |>
  mutate_at(c(10), ~replace(., is.infinite(.), 0)) -> pre_home_merged

pre_home_merged |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

pre_home_merged |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 


pre_home_merged |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      ))-> bar_pre_exposure

bar_pre_exposure + plot_annotation('Plot 5-1. Pre-Treatment Exposure', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot5_muted.png", height = 4, width = 8)
# post_home_merged 
pre_home_merged |> 
  filter(group == "Control") |> 
  select(activity_rate) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

pre_home_merged |> 
  filter(group == "Muting") |> 
  select(activity_rate) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

pre_home_merged |>
  ggplot(aes(x = group, y = activity_rate)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "tweets / day",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      ))-> bar_pre_hometimeline_rate 

plot_pre_exposure_rate <- bar_pre_hometimeline_rate + 
  plot_annotation('Plot 5-2. Pre-Treatment Exposure (Daily activity rate)', 
                  theme=theme(plot.title=element_text(size = 12, 
                                                      face = "bold", 
                                                      color = "black",
                                                      hjust=0.5)))
plot_pre_exposure_rate

ggsave("../Results/plot5_muted_rate.png", height = 4, width = 8)
pre_home_merged |>
  ggplot(aes(x = group, y = log10(activity_rate+1))) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "log10((tweets/day)+1)",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      )) -> bar_pre_hometimeline_rate_log

plot_pre_exposure_rate_log <- bar_pre_hometimeline_rate_log + 
  plot_annotation('Plot 5-3. Pre-Treatment Exposure (Daily activity rate; log10(y+1))', 
                  theme=theme(plot.title=element_text(size = 12, 
                                                      face = "bold", 
                                                      color = "black",
                                                      hjust=0.5)))
plot_pre_exposure_rate_log

ggsave("../Results/plot5_muted_rate_log.png", height = 4, width = 8)

2.6 Plot 6. Pre-treatment likes

Pre-treatment exposure between control vs. muting (for muted accounts)

df_plot_muted_recruit |>
  filter(tag == "pre_likes") |>  
  select(user_id, group, n, vs_recruitment) -> df_pre_likes

df_pre_likes |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_pre_likes |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 


df_pre_likes |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
      caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)", "\n",
                        "P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Control: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "Muting: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
      ))-> bar_pre_likes

bar_pre_likes + plot_annotation('Plot 6. Pre-Treatment Likes', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot6_muted.png", height = 4, width = 8)

2.6.1 Takeaways

In general, no differences between control vs. muting groups in pre-treatment engagements, exposure, and likes!


2.7 Plot by Recruitment Mode

Pre-treatment Engagement

# ENGAGEMENT 
df_pre_engagement |> 
  filter(vs_recruitment == "Twitter recruitment") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1

df_pre_engagement |> 
  filter(vs_recruitment == "New panelist recruited after January 1, 2024") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_pre_engagement |> 
  filter(vs_recruitment == "Existing panelist") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group3 


df_pre_engagement |>
  ggplot(aes(x = vs_recruitment, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = vs_recruitment)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
           aes(fill = vs_recruitment)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
  labs(x = "Recruitment Mode",
       y = "Count") +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Twitter recruitment: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
      "New panelist: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
      "Existing panelist: mean = ", signif(mean(group3), 2), 
      "; sd = ", signif(sd(group3), 2),
      "; median = ", signif(median(group3), 2), 
      "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
      )) -> bar_pre_engagement_recruit

bar_pre_engagement_recruit + 
  plot_annotation('Plot 7. Pre-Treatment Engagement by Recruitment Mode', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot7_muted.png", height = 4, width = 8)

Pre-treatment Exposure

# Use pre_home_merged
pre_home_merged |> 
  filter(vs_recruitment == "Twitter recruitment") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1

pre_home_merged |> 
  filter(vs_recruitment == "New panelist recruited after January 1, 2024") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

pre_home_merged |> 
  filter(vs_recruitment == "Existing panelist") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group3 


pre_home_merged |>
  ggplot(aes(x = vs_recruitment, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = vs_recruitment)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
           aes(fill = vs_recruitment)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
  labs(x = "Recruitment Mode",
       y = "Count") +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Twitter recruitment: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
      "New panelist: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
      "Existing panelist: mean = ", signif(mean(group3), 2), 
      "; sd = ", signif(sd(group3), 2),
      "; median = ", signif(median(group3), 2), 
      "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
      )) -> bar_pre_exposure_recruit

bar_pre_exposure_recruit + 
  plot_annotation('Plot 8-1. Pre-Treatment Exposure by Recruitment Mode', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot8_muted.png", height = 4, width = 8)

Plot 8-2. Y-axis into activity rate

# Use pre_home_merged
pre_home_merged |> 
  filter(vs_recruitment == "Twitter recruitment") |> 
  select(activity_rate) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1

pre_home_merged |> 
  filter(vs_recruitment == "New panelist recruited after January 1, 2024") |> 
  select(activity_rate) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

pre_home_merged |> 
  filter(vs_recruitment == "Existing panelist") |> 
  select(activity_rate) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group3 


pre_home_merged |>
  ggplot(aes(x = vs_recruitment, y = activity_rate)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = vs_recruitment)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
           aes(fill = vs_recruitment)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
  labs(x = "Recruitment Mode",
       y = "tweet / day") +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Twitter recruitment: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
      "New panelist: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
      "Existing panelist: mean = ", signif(mean(group3), 2), 
      "; sd = ", signif(sd(group3), 2),
      "; median = ", signif(median(group3), 2), 
      "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
      )) -> bar_pre_exposure_recruit

bar_pre_exposure_recruit + 
  plot_annotation('Plot 8-2. Pre-Treatment Exposure by Recruitment Mode (Daily Activitiy Rate)', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot8-2_muted.png", height = 4, width = 8)
pre_home_merged |>
  ggplot(aes(x = vs_recruitment, y = log10(activity_rate+1))) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = vs_recruitment)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
           aes(fill = vs_recruitment)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
  labs(x = "Recruitment Mode",
       y = "log10((tweets/day)+1)") + 
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Twitter recruitment: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
      "New panelist: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
      "Existing panelist: mean = ", signif(mean(group3), 2), 
      "; sd = ", signif(sd(group3), 2),
      "; median = ", signif(median(group3), 2), 
      "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
      )) -> bar_pre_exposure_recruit_log

bar_pre_exposure_recruit_log + 
  plot_annotation('Plot 8-3. Pre-Treatment Exposure by Recruitment Mode (Daily activity rate; ; log10(y+1))', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot8-3_muted.png", height = 4, width = 8)

pre-treatment likes

# LIKES 
df_pre_likes |> 
  filter(vs_recruitment == "Twitter recruitment") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_pre_likes |> 
  filter(vs_recruitment == "New panelist recruited after January 1, 2024") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2

df_pre_likes |> 
  filter(vs_recruitment == "Existing panelist") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group3


df_pre_likes |>
  ggplot(aes(x = vs_recruitment, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = vs_recruitment)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
           aes(fill = vs_recruitment)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
  labs(x = "Recruitment Mode",
       y = "Count") +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Twitter recruitment: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
      "New panelist: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
      "Existing panelist: mean = ", signif(mean(group3), 2), 
      "; sd = ", signif(sd(group3), 2),
      "; median = ", signif(median(group3), 2), 
      "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
      )) -> bar_pre_likes_recruit

bar_pre_likes_recruit + plot_annotation('Plot 9. Pre-Treatment Likes by Recruitment Mode', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot9_muted.png", height = 4, width = 8)

3 [Hints] Hypotheses?

Hypothesis 1: Assignment to the muting treatment (H1a) and the media literacy treatment (H1b) will reduce total engagement with low-quality news sources during the treatment period. The effect of the muting treatment will be larger (H1c). 

this_is_final_data |>
  select(user_id, tag, randomized_group, count) |> 
  group_by(user_id, tag) |>
  count() |> ungroup() -> test_for_all

# test_for_all - if there are no categories (tag), freq == 0
test_for_all |> 
  select(user_id, tag, freq) |>
  pivot_wider(names_from=tag, values_from=freq) |> 
  mutate_if(is.numeric , replace_na, replace = 0) |>
  relocate(user_id, 
           pre_hometimeline, post_hometimeline, 
           pre_engagement, post_engagement,
           pre_likes, post_likes) -> test_for_all_wider

test_for_all_wider |> 
  select(user_id, pre_hometimeline:post_likes) |> 
  pivot_longer(cols=pre_hometimeline:post_likes, 
               names_to = "tag",
               values_to = "n") -> test_for_all_longer
test_for_all |> 
  select(user_id, tag, count) |> 
  unique() |>
  merge(test_for_muted_longer, by=c("user_id", "tag"), all=TRUE) |>
  mutate_if(is.numeric , replace_na, replace = 0) -> test_for_all_longer2

test_for_all |> 
  select(user_id, tag, randomized_group) |> 
  unique() -> for_merge_all_1

for_merge_all_1 |> 
  select(user_id, randomized_group) |> 
  unique() -> for_merge_all_2

test_for_all_longer2 |> 
  merge(for_merge_all_1, 
        by = c('user_id', 'tag'), all.x = T) |> 
  select(user_id, tag, count, n) |> 
  merge(for_merge_all_2, by = "user_id") -> test_for_all_longer3

# backup
df_plot_all = test_for_all_longer3
df_plot_all |>
  filter(tag=="pre_likes" | tag=="pre_engagement") |> 
  select(!count) |> 
  pivot_wider(id_cols=c('user_id', 'randomized_group'), 
                        names_from = "tag", values_from="n") |>
  mutate_if(is.numeric , replace_na, replace = 0) |>
  mutate(pre_total_engagement = pre_likes + pre_engagement) |>
  select(user_id, randomized_group, pre_total_engagement) -> pre_total_engage 

df_plot_all |>
  filter(tag=="post_likes" | tag=="post_engagement") |> 
  select(!count) |> 
  pivot_wider(id_cols=c('user_id', 'randomized_group'), 
                        names_from = "tag", values_from="n") |>
  mutate_if(is.numeric , replace_na, replace = 0) |>
  mutate(post_total_engagement = post_likes + post_engagement) |>
  select(user_id, randomized_group, post_total_engagement) -> post_total_engage 

pre_total_engage |> 
  merge(post_total_engage, by=c("user_id", "randomized_group")) -> merge_total_engage
merge_total_engage |>
    ggplot(aes(x = randomized_group, y = pre_total_engagement)) +
    geom_violin(aes(fill = randomized_group), alpha=0.3) +
    ggbeeswarm::geom_quasirandom(
        shape = 21, color = "white", 
        alpha = 0.9, size = 3,
        aes(fill = randomized_group)
    ) +
    geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
             aes(fill = randomized_group)) +
    stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
    scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
    labs(x = "Randomized Group",
         y = "Count (sqrt scale)") +
    theme_classic() +
    theme(
        text = element_text(size = 12, face = "bold", color = "black"),
        axis.text = element_text(color = "black"),
        legend.position = "none",
        plot.title = element_text(size = 10),
        plot.caption = element_text(hjust = 0)
    ) +
    ggtitle(
        paste0(
            "Muting: mean = ", signif(mean(group1), 2), 
            "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
            "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
            "Media Literacy: mean = ", signif(mean(group2), 2), 
            "; sd = ", signif(sd(group2), 2),
            "; median = ", signif(median(group2), 2), 
            "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
            "Control: mean = ", signif(mean(group3), 2), 
            "; sd = ", signif(sd(group3), 2),
            "; median = ", signif(median(group3), 2), 
            "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
        )) + scale_y_sqrt() +
  plot_annotation('Pre-Treatment Total Engagement', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

Muting, Media Literacy, Control 

→ Y: Post-Treatment Engagements+Likes with LQ sources 

Same plots but with randomized_group + combined measure of post-treatment engagements+likes 

# df_plot_all_merged - pre_total_engagement, post_total_engagement (=count)
merge_total_engage |> 
  filter(randomized_group == "muting_treatment1") |> 
  select(post_total_engagement) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

merge_total_engage |> 
  filter(randomized_group == "media_literacy") |> 
  select(post_total_engagement) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2

merge_total_engage |> 
  filter(randomized_group == "control") |> 
  select(post_total_engagement) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group3

merge_total_engage |>
  ggplot(aes(x = randomized_group, y = post_total_engagement)) +
  geom_violin(aes(fill = randomized_group), alpha=0.3) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = randomized_group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
           aes(fill = randomized_group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
  labs(x = "Randomized Group",
       y = "Count") +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Muting: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
      "Media Literacy: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
      "Control: mean = ", signif(mean(group3), 2), 
      "; sd = ", signif(sd(group3), 2),
      "; median = ", signif(median(group3), 2), 
      "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
      )) -> bar_post_total_engage

bar_post_total_engage + scale_y_sqrt() +
  plot_annotation('Post-Treatment Total Engagement', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/post_total_engagement.png", height = 4, width = 8)
merge_total_engage |> 
  mutate(randomized_group = ifelse(randomized_group=="media_literacy", "Media Literacy", ifelse(randomized_group=="control", "Control", "Muting"))) |> 
  pivot_longer(cols=c("pre_total_engagement", "post_total_engagement")) |> 
  mutate(group = factor(name, 
                        levels = c('pre_total_engagement', 
                                   'post_total_engagement'))) |> 
 grouped_ggwithinstats(x=group, y=value, 
                       grouping.var = randomized_group, 
                       type = "np",
                       p.adjust.methods = 'fdr',
                       ggtheme = theme_pubr(),
                       results.subtitle = FALSE,
                       ggplot.component =  list(
                         scale_y_continuous(limits = c(0,60)),
                         scale_color_manual(values = brewer.pal(8, "Accent")[1:2]),
                         xlab(""),
                         ylab("Count")
                         ),
                       annotation.args = plot_annotation(
                         'Pre-Treatment vs. Post-Treatment Total Engagement',
                         theme=theme(plot.title=element_text(size = 14, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5))))  

ggsave("../Results/within_engagement.png", height = 4, width = 15)

Research question 2: Will assignment to the muting treatment affect total engagement with low-quality news sources excluded from the treatment (i.e., those that are not muted) during the treatment period?

this_is_final_data |>
  filter(muted == "no") |> 
  select(user_id, tag, randomized_group, count) |> 
  group_by(user_id, tag) |>
  count() |> ungroup() -> test_for_nonmuted

# test_for_nonmuted - if there are no categories (tag), freq == 0
test_for_nonmuted |> 
  select(user_id, tag, freq) |>
  pivot_wider(names_from=tag, values_from=freq) |> 
  mutate_if(is.numeric , replace_na, replace = 0) |>
  relocate(user_id, 
           pre_hometimeline, post_hometimeline, 
           pre_engagement, post_engagement,
           pre_likes, post_likes) -> test_for_nonmuted_wider

test_for_nonmuted_wider |> 
  select(user_id, pre_hometimeline:post_likes) |> 
  pivot_longer(cols=pre_hometimeline:post_likes, 
               names_to = "tag",
               values_to = "n") -> test_for_nonmuted_longer
test_for_nonmuted |> 
  select(user_id, tag, count) |> 
  unique() |>
  merge(test_for_muted_longer, by=c("user_id", "tag"), all=TRUE) |>
  mutate_if(is.numeric , replace_na, replace = 0) -> test_for_nonmuted_longer2

test_for_nonmuted |> 
  select(user_id, tag, randomized_group) |> unique() -> for_merge_nonmuted_1
for_merge_nonmuted_1 |> 
  select(user_id, randomized_group) |> 
  unique() -> for_merge_nonmuted_2

test_for_nonmuted_longer2 |> 
  merge(for_merge_nonmuted_1, by = c('user_id', 'tag'), all.x = T) |> 
  select(user_id, tag, count, n) |> 
  merge(for_merge_nonmuted_2, by = "user_id") -> test_for_nonmuted_longer3

# backup
df_plot_nonmuted = test_for_nonmuted_longer3

# df for visualization 
df_plot_nonmuted |>
  filter(tag=="pre_likes" | tag=="pre_engagement") |> 
  select(!count) |> 
  pivot_wider(id_cols=c('user_id', 'randomized_group'), 
                        names_from = "tag", values_from="n") |>
  mutate_if(is.numeric , replace_na, replace = 0) |>
  mutate(pre_total_engagement = pre_likes + pre_engagement) |>
  select(user_id, randomized_group, pre_total_engagement) -> pre_total_engage_nonmuted

df_plot_nonmuted |>
  filter(tag=="post_likes" | tag=="post_engagement") |> 
  select(!count) |> 
  pivot_wider(id_cols=c('user_id', 'randomized_group'), 
                        names_from = "tag", values_from="n") |>
  mutate_if(is.numeric , replace_na, replace = 0) |>
  mutate(post_total_engagement = post_likes + post_engagement) |>
  select(user_id, randomized_group, post_total_engagement) -> post_total_engage_nonmuted 

pre_total_engage_nonmuted |> 
  merge(post_total_engage_nonmuted, 
        by=c("user_id", "randomized_group")) -> merge_total_engage_nonmuted
merge_total_engage_nonmuted |> 
  filter(randomized_group == "muting_treatment1") |> 
  select(post_total_engagement) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

merge_total_engage_nonmuted |> 
  filter(randomized_group == "media_literacy") |> 
  select(post_total_engagement) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2

merge_total_engage_nonmuted |> 
  filter(randomized_group == "control") |> 
  select(post_total_engagement) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group3

merge_total_engage_nonmuted |>
  ggplot(aes(x = randomized_group, y = post_total_engagement)) +
  geom_violin(aes(fill = randomized_group), alpha=0.3) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.9, size = 3,
    aes(fill = randomized_group)
  ) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, 
           aes(fill = randomized_group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
  labs(x = "Randomized Group",
       y = "Count") +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "Muting: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
      "Media Literacy: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2),
      "; median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
      "Control: mean = ", signif(mean(group3), 2), 
      "; sd = ", signif(sd(group3), 2),
      "; median = ", signif(median(group3), 2), 
      "; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
      )) -> bar_post_total_engage_nonmuted

bar_post_total_engage_nonmuted + scale_y_sqrt() +
  plot_annotation('Post-Treatment Total Engagement with Non-Muted LQ Sources', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/post_total_engagement_nonmuted.png", height = 4, width = 8)
merge_total_engage_nonmuted |> 
  mutate(randomized_group = ifelse(randomized_group=="media_literacy", "Media Literacy", ifelse(randomized_group=="control", "Control", "Muting"))) |> 
  pivot_longer(cols=c("pre_total_engagement", "post_total_engagement")) |> 
  mutate(group = factor(name, 
                        levels = c('pre_total_engagement', 
                                   'post_total_engagement'))) |> 
 grouped_ggwithinstats(x=group, y=value, 
                       grouping.var = randomized_group, 
                       type = "np",
                       p.adjust.methods = 'fdr',
                       ggtheme = theme_pubr(),
                       results.subtitle = FALSE,
                       ggplot.component =  list(
                         scale_y_continuous(limits = c(0,60)),
                         scale_color_manual(values = brewer.pal(8, "Accent")[1:2]),
                         xlab(""),
                         ylab("Count")
                         ),
                       annotation.args = plot_annotation(
                         'Pre-Treatment vs. Post-Treatment Total Engagement with Non-Muted LQ Sources',
                         theme=theme(plot.title=element_text(size = 14, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5))))  

ggsave("../Results/within_engagement_nonmuted.png", height = 4, width = 15)

4 Note

  1. About data collection
    • I collected post-treatment data on Mar 26, which is ~26 days after the start of Wave 2. Thus, the post-treatment data contains minimum 19 days ~ maximum 26 days post Wave 2, while pre-treatment data contains maximum 4 weeks.

      • FYI) For pre-treatment likes, the date can go way back.
    • Post-treatment data: Considering that muting took ~1 day or more, I filtered data with created_at >= Wave 2 session start time + 2 days, to make sure that I only include tweets from LQ sources after the muting job is done.