visualization

1 Scatter Plot

Ro’ee: I think it would be useful at some point to create a scatter plot with WTA in round 1 in the x-axis and round 2 in the y-axis (and also just report the correlation). It’s a bit surprising that so many people provided a different answer, we should figure out if this is related to the question wording.

1.1 Plot 1.

14 cases removed (over $100)

library(tidyverse)
library(ggplot2)
library(DT)
library(haven)
library(ggpubr)
library(cowplot)
library(ggthemes)
library(readr)
library(ggExtra)
library(scales)
wta_df <- read_csv("wta/wta_df.csv", col_types = cols(userid = col_character(), 
                                                      wta_w1 = col_integer(), 
                                                      wta_w2 = col_integer(), 
                                                      w2_compre1 = col_integer(), 
                                                      w2_compre2 = col_integer()))


plot0 <- ggplot(wta_df, mapping = aes(wta_w1, wta_w2)) +
  geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
  geom_hline(yintercept = 15, lty = 2, color = 'red') + 
  geom_vline(xintercept = 15, lty = 2, color = 'red') + 
  geom_point(alpha=0.6) +
  stat_cor(method = 'pearson') + 
  xlim(c(0, 100)) + 
  ylim(c(0, 100)) +
  theme_few() +
  theme(legend.position = 'bottom') +
  xlab('WTA distribution in Wave 1 (0~100)') + 
  ylab('WTA distribution in Wave 2 (0~100)')

ggMarginal(plot0, fill='gray')

1.2 Plot 2.

Group by WTA group assigned in Wave 1
- assigned_wta_w1: In Wave 1, what condition was the user assigned to
  - scale_within: user chose a number within 0~30 scale
  - scale_over30: user chose ‘over 30’ and then put the WTA amount
  - open-ended: user assigned to open-ended version (as in Wave 2)

Among those who got open-ended ver. in Wave 1,

always-over15: 33 (28.4%)
always-under15: 47 (40.5%)
favorable-shift: 20 (17.2%)
unfavorable-shift: 16 (13.8%)

cf) Among those who got scale ver. in Wave 1,

always-over15: 53 (40.8%)
always-under15: 35 (26.9%)
favorable-shift: 32 (24.6%)
unfavorable-shift: 10 (7.69%)

wta_df |>
  filter(assigned_wta_w1=='open-ended') |>
    mutate(
    category = ifelse(
      wta_w1 < 15 & wta_w2 < 15, "always-under15", 
      ifelse(wta_w1 >= 15 & wta_w2 >= 15, "always-over15", 
             ifelse(wta_w1 >=15 & wta_w2 < 15, "favorable-shift", "unfavorable-shift")))
    ) |>
    filter(!is.na(wta_w1)) -> op_wta_df

# 
randomized_group <- read_csv("randomized_group.csv", 
    col_types = cols(id = col_skip(), user_id = col_character(), 
        session_start = col_datetime(format = "%Y-%m-%d %H:%M:%S")))

names(randomized_group)[1] = "userid"

op_wta_df |>
  merge(randomized_group, by = c("userid"), all.x = T) -> op_wta_df2 

op_wta_df2 |>
  mutate(
    version = ifelse(session_start < "2024-03-01", "ver1", "ver2")
  ) -> op_wta_df2

plot1_op <- ggplot(op_wta_df, mapping = aes(wta_w1, wta_w2, color=category)) +
  geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
  geom_hline(yintercept = 15, lty = 2, color = 'red') + 
  geom_vline(xintercept = 15, lty = 2, color = 'red') + 
  geom_jitter(alpha=0.5) +
  stat_cor(method = 'pearson') + 
  xlim(c(0, 100)) + 
  ylim(c(0, 100)) +
  theme_few() +
  theme(legend.position = 'bottom') +
  xlab('WTA distribution in Wave 1 (0~100)') + 
  ylab('WTA distribution in Wave 2 (0~100)') +
  ggtitle('Scatter Plot (only among those with open-ended ver. in Wave 1)')


ggMarginal(plot1_op, groupColour = TRUE, groupFill = TRUE)

1.3 Plot 3.

Group by category based on WTA $
- always-under15: user chose below $15 in both waves
- always-over15: user chose $15 or over in both waves
- favorable-shift: user chose $15 or over in wave 1 but shifted below $15 in wave 2
- unfavorable-shift: user chose below $15 in wave 1 but shifted $15 or over in wave 2

wta_df |>
  mutate(
    category = ifelse(
      wta_w1 < 15 & wta_w2 < 15, "always-under15", 
      ifelse(wta_w1 >= 15 & wta_w2 >= 15, "always-over15", 
             ifelse(wta_w1 >=15 & wta_w2 < 15, "favorable-shift", "unfavorable-shift")))
    ) |>
    filter(!is.na(wta_w1)) -> wta_df


ggplot(wta_df, mapping = aes(wta_w1, wta_w2, col=category)) +
  geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
  geom_point(alpha=0.6) +
  geom_hline(yintercept = 15, lty = 2, color = 'red') + 
  geom_vline(xintercept = 15, lty = 2, color = 'red') + 
  theme_few() +
  xlim(c(0, 100)) +
  ylim(c(0, 100)) +
  stat_cor(method = 'pearson') + 
  xlab('WTA distribution in Wave 1 (0~100)') + 
  ylab('WTA distribution in Wave 2 (0~100)') +
  theme(legend.position='bottom') -> plot2

ggMarginal(plot2, groupColour = TRUE, groupFill = TRUE)

1.4 Plot 4.

log10 scales for both x, y axes
no filtering out those WTA > $100

ggplot(wta_df, mapping = aes(wta_w1, wta_w2, col=category)) +
  geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
  geom_point(alpha=0.6) +
  geom_hline(yintercept = 15, lty = 2, color = 'red') + 
  geom_vline(xintercept = 15, lty = 2, color = 'red') + 
  scale_x_log10(labels = label_log(digits = 2)) +
  scale_y_log10(labels = label_log(digits = 2)) +
  theme_few() +
  stat_cor(method = 'pearson') + 
  xlab('WTA distribution in Wave 1 (log10 scale)') + 
  ylab('WTA distribution in Wave 2 (log10 scale)') +
  theme(legend.position='bottom') -> plot3

ggMarginal(plot3, groupColour = TRUE, groupFill = TRUE)

2 Engagements & Exposure

library(tidyverse)
library(DT)
library(haven)
library(readr)

# load raw data 
df_w1 <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1.rds")
df_w1_supp <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1_supp.rds")
df_w2 <- readRDS("~/Downloads/2023-095b_files/2023-095b_client_wave-2.rds")


# bind wave 1 data + filter only Progress == 100 
df_w1 = rbind(df_w1, df_w1_supp)
df_w1 |>
  select(-user_id) |> 
  filter(Progress==100) -> df_w1

# invited users list (for merge)
Users_to_invite_Wave2 <- read_csv("Users_to_invite_Wave2.csv", 
    col_types = cols(user_id = col_character()))
Users_to_invite_Wave2 |> 
  select(user_id) |> 
  mutate(userid = user_id) -> for_merge

# merged with invited user list 
df_w2 |>
  merge(for_merge, by='userid') -> w2_users

# merged with invited user list 
w2_users |> 
  select(userid, user_id) |>
  merge(df_w1, by='userid') -> w1_users

# drop randomized_group == 'drop' from wave 2
w2_users |>
  filter(randomized_group != 'drop') |>
  filter(randomized_group != 'muting_treatment2_drop') |>
  select(userid, randomized_group) -> wave2_data
# 134 users without drop 

# muted_accounts
muted_accounts <- read_csv("muted_accounts.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

# exposure data 
pre_hometimeline <- read_csv("pre_hometimeline.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

pre_hometimeline |> 
  rename(type = match_type) |>
  mutate(tag = "pre_hometimeline") -> pre_hometimeline

post_hometimeline <- read_csv("post_hometimeline.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

post_hometimeline |> 
  mutate(tag = "post_hometimeline") -> post_hometimeline

# engagement data
pre_engagement <- read_csv("pre_engagement.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

pre_engagement |> 
  mutate(tag = "pre_engagement") -> pre_engagement

post_engagement <- read_csv("post_engagement.csv", 
    col_types = cols(user_id = col_character(), 
        target_user_id = col_character()))

post_engagement |> 
  mutate(tag = "post_engagement") -> post_engagement

# truncated list
updated_inventory <- read_csv("updated_inventory.csv", 
    col_types = cols_only(target_user_id = col_character())) 
reduced_inventory = updated_inventory[c(1:489),]
#reduced_inventory <- read_csv("reduced_inventory.csv", 
#    col_types = cols_only(target_user_id = col_character()))

wave2_data |> rename(user_id = userid) |> merge(pre_engagement, by='user_id') -> this_1
wave2_data |> rename(user_id = userid) |> merge(post_engagement, by='user_id') -> this_2
wave2_data |> rename(user_id = userid) |> merge(pre_hometimeline, by='user_id') -> this_3
wave2_data |> rename(user_id = userid) |> merge(post_hometimeline, by='user_id') -> this_4
rbind(this_1, this_2, this_3, this_4) -> test

reduced_inventory |> mutate(reduced = "yes") -> reduced_inventory 
muted_accounts |> mutate(muted = "yes") -> muted_accounts

test |> 
  merge(muted_accounts, by = c('user_id', 'target_user_id'), all.x = TRUE) |>
  mutate(
    muted = ifelse(is.na(muted), "no", muted)
  ) |>
  merge(reduced_inventory, by='target_user_id', all.x = TRUE) |>
  mutate(
    reduced = ifelse(is.na(reduced), "no", reduced)
  ) |>
  relocate(c(user_id, randomized_group)) -> this_is_full_data

# count_engagement.csv 
count_engagement <- read_csv("count_engagement.csv", 
    col_types = cols(user_id = col_character()))

this_is_full_data |>
  merge(count_engagement, by=c('user_id','tag') ,all.x=TRUE) -> this_is_final_data

2.1 Data Description

this_is_final_data

(user level)

user_id: Participants’ Twitter IDs
randomized_group: Randomized group - media_literacy, muting_treatment1/2, control
tag:
- pre_hometimeline/engagement: home timeline (reverse chronological) or engagement data collected before treatment (wave 2)
- post_hometimeline/engagement: home timeline or engagement data collected after treatment (wave 2) ~ till Mar 14
  - I collected post-treatment data on Mar 14, which is 2 weeks after the start of Wave 2. Thus, the data contains maximum 2 weeks ~ minimum 1 week post-treatment.
count: Number of tweets collected

(user-LQ account dyad level)

target_user_id: LQ accounts’ Twitter IDs (found in home timeline or engagement data)
type: Type of tweets related to LQ sources
- direct: Direct tweets mentioning LQ sources
- replied: Reply tweets to LQ sources
- retweeted: Any retweets of LQ tweets
- quoted: Any quote tweets of LQ tweets
muted : Whether the LQ source is muted (“yes”) or not (“no”)
reduced : Whether the LQ source is included in the reduced list (“yes”) or not (“no”)

this_is_final_data |>
  relocate(user_id, randomized_group, tag, count) |>
  datatable(filter='top')

2.2 Clarification of the task

Post-treatment engagement/exposure between control (those unmuted from the onset) vs. muting group (for 30% unmuted):
- to check there are no differences between these two groups, and
- whether they are still engaging with LQ sources that are not muted
Add the same plot as #1 but with pre-treatment engagement/exposure
- to check that those unmuted have not changed behavior after the treatment (compared to pre-treatment period)

library(Rmisc)
# Muting 30% filtered 
this_is_final_data |>
  mutate(
    group = ifelse(randomized_group=="media_literacy", "MediaLiteracy", 
                   ifelse(randomized_group=='control', "Control", "Muting")
  )) |>
  mutate(flag = ifelse(
    group == "Control", "include", # Control group all included 
    ifelse( # For Muting group, only include cases where muted == "no" 
      group == "Muting" & muted == "no", "include", "drop"
    )
  )) |> 
  filter(flag == "include") |> select(!flag) -> test_again

test_again |> 
  select(user_id, tag, randomized_group, count) |> 
  group_by(user_id, tag) |>
  count() |> ungroup() -> test_for_unmuted30 

# test_for_unmuted30 - if there are no categories (tag), freq == 0
test_for_unmuted30 |> 
  select(user_id, tag, freq) |>
  pivot_wider(names_from=tag, values_from=freq) |> 
  mutate_if(is.numeric , replace_na, replace = 0) |>
  relocate(user_id, 
           pre_hometimeline, post_hometimeline, 
           pre_engagement, post_engagement) -> test_for_unmuted30_wider

test_for_unmuted30_wider |> 
  select(user_id, pre_hometimeline:post_engagement) |> 
  pivot_longer(cols=pre_hometimeline:post_engagement, 
               names_to = "tag",
               values_to = "n") -> test_for_unmuted30_longer
test_again |> 
  select(user_id, tag, count) |> 
  unique() |>
  merge(test_for_unmuted30_longer, by=c("user_id", "tag"), all=TRUE) |>
  mutate_if(is.numeric , replace_na, replace = 0) -> test_for_unmuted30_longer2

test_again |> select(user_id, tag, randomized_group) |> unique() -> for_merge_unmuted30_1
for_merge_unmuted30_1 |> 
  select(user_id, randomized_group) |> 
  unique() -> for_merge_unmuted30_2

test_for_unmuted30_longer2 |> 
  merge(for_merge_unmuted30_1, by = c('user_id', 'tag'), all.x = T) |> 
  select(user_id, tag, count, n) |> 
  merge(for_merge_unmuted30_2, by = "user_id") |> 
  mutate(group = ifelse(randomized_group == "control" | 
                          randomized_group == "media_literacy", 
                        "Control", "Muting")) -> test_for_unmuted30_longer3

# backup
df_plot_unmuted = test_for_unmuted30_longer3

2.3 Plot 1. Post-treatment engagement

Post-treatment engagement between control vs. muting (for unmuted accounts)

library(tidyverse)
library(ggbeeswarm) # to make jitter plots  
library(RColorBrewer) # for the colors 
library(patchwork)

df_plot_unmuted |>
  mutate(rate = (n/count)*100) |> 
  mutate_if(is.numeric , replace_na, replace = 0) |>
  mutate(rate = round(rate,1)) |>
  filter(tag == 'post_engagement') |> # tag == 'post_engagement' filter
  select(user_id, group, n, rate) -> df_post_engagement

df_post_engagement |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_post_engagement |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_post_engagement |>
  ggplot(aes(x = group, y = n)) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.8, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "\n",
      "group2: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2)
    )) -> bar_post_engagement

df_post_engagement |>
  ggplot(aes(x = group, y = n)) +
  geom_boxplot(width = 0.7, alpha = 0.8, aes(fill = group)) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(wilcox.test(group1, group2)$p.value, 2),
                        " (Wilcoxon rank sum test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> box_post_engagement

df_post_engagement |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.8, size = 3,
    aes(fill = group)
  ) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
        caption = paste0("P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> dot_post_engagement


wrap_plots(
  bar_post_engagement, box_post_engagement, dot_post_engagement, nrow = 1
)  + plot_annotation('Plot 1. Post-Treatment Engagements', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot1_unmuted.png", height = 5, width = 10)

2.3.1 Description (1)

The y-axis (Count) indicates the number of tweets related to LQ sources that one engaged with (=engagement). These include direct mentions of LQ accounts, replies, retweets, and quote tweets of tweets created by LQ accounts, as well as direct URLs to these LQ domains.

Group: Control includes control group and media literacy group, while Muting includes muting treatment groups. For Muting group, only engagements with those LQ accounts that are not muted are counted. Note that in pilot study, we muted 70% of the truncated list (truncated list: n=489). So, for Muting group, LQ accounts that are ‘not muted’ includes 30% of truncated list + those that are not included in the truncated list. For the main study, we would not truncate the list.

Hence, Plot 1 allows us to compare the post-treatment engagements with unmuted LQ accounts between Control and Muting groups.

For most participants, we see low levels of engagements with LQ accounts with a few outliers (active ones). Given this highly skewed distribution, and to be careful with comparison, I drew the same quantities but with different ways of visualizing them.

(left) Bar plot
- Comparison: mean
- Error bars indicate SEs of the mean
- t-test statistics (p-value presented)
(middle) Box plot
- Comparison: median
- Wilcoxon rank sum test (nonparametric)
(right) Dot plot
- Comparison: distribution
- K-S test (nonparametric)

2.4 Plot 2. Post-treatment exposure

Post-treatment exposure between control vs. muting (for unmuted accounts)

# Use df_plot_unmuted 
df_plot_unmuted |>
  filter(tag == "post_hometimeline") |> # tag == 'post_hometimeline' filter
  select(user_id, group, n) -> df_post_hometimeline

df_post_hometimeline |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_post_hometimeline |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_post_hometimeline |>
  ggplot(aes(x = group, y = n)) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.8, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "\n",
      "group2: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2)
    )) -> bar_post_hometimeline 

df_post_hometimeline |>
  ggplot(aes(x = group, y = n)) +
  geom_boxplot(width = 0.7, alpha = 0.8, aes(fill = group)) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(wilcox.test(group1, group2)$p.value, 2),
                        " (Wilcoxon rank sum test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> box_post_hometimeline

df_post_hometimeline |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.8, size = 3,
    aes(fill = group)
  ) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
        caption = paste0("P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> dot_post_hometimeline


wrap_plots(
  bar_post_hometimeline, box_post_hometimeline, dot_post_hometimeline, nrow = 1
)  -> plot2

plot2 + plot_annotation('Plot 2. Post-Treatment Exposure', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot2_unmuted.png", height = 5, width = 10)

2.4.1 Description (2)

The y-axis (Count) indicates the number of LQ tweets found in one’s home timeline (=exposure).

Group: Control includes control group and media literacy group, while Muting includes muting treatment groups. For Muting group, only exposures to those LQ accounts that are not muted are counted.

Hence, Plot 2 allows us to compare the post-treatment exposures to unmuted LQ accounts between Control and Muting groups.

For more descriptions of each panel, please refer to the above descriptions. And for Plots 3 and 4, I replicated Plots 1 and 2 with pre-treatment data.

2.5 Plot 3. Pre-treatment engagement

Pre-treatment engagement between control vs. muting (for unmuted accounts)

df_plot_unmuted |>
  filter(tag == "pre_engagement") |> 
  select(user_id, group, n) -> df_pre_engagement

df_pre_engagement |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_pre_engagement |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_pre_engagement |>
  ggplot(aes(x = group, y = n)) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.8, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "\n",
      "group2: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2)
    )) -> bar_pre_engagement

df_pre_engagement |>
  ggplot(aes(x = group, y = n)) +
  geom_boxplot(width = 0.7, alpha = 0.8, aes(fill = group)) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(wilcox.test(group1, group2)$p.value, 2),
                        " (Wilcoxon rank sum test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> box_pre_engagement

df_pre_engagement |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.8, size = 3,
    aes(fill = group)
  ) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
        caption = paste0("P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> dot_pre_engagement


wrap_plots(
  bar_pre_engagement, box_pre_engagement, dot_pre_engagement, nrow = 1
)  -> plot3

plot3 + plot_annotation('Plot 3. Pre-Treatment Engagements', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot3_unmuted.png", height = 5, width = 10)

2.6 Plot 4. Pre-treatment exposure

Pre-treatment exposure between control vs. muting (for unmuted accounts)

# Use df_plot_unmuted 
df_plot_unmuted |>
  filter(tag == "pre_hometimeline") |> # tag == 'post_hometimeline' filter
  select(user_id, group, n) -> df_pre_hometimeline

df_pre_hometimeline |> 
  filter(group == "Control") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group1 

df_pre_hometimeline |> 
  filter(group == "Muting") |> 
  select(n) |> 
  as.vector() |> 
  unlist() |>
  as.integer() -> group2 

df_pre_hometimeline |>
  ggplot(aes(x = group, y = n)) +
  geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.8, aes(fill = group)) +
  stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(t.test(group1, group2)$p.value, 2),
                        " (t test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: mean = ", signif(mean(group1), 2), 
      "; sd = ", signif(sd(group1), 2), "\n",
      "group2: mean = ", signif(mean(group2), 2), 
      "; sd = ", signif(sd(group2), 2)
    )) -> bar_pre_hometimeline

df_pre_hometimeline |>
  ggplot(aes(x = group, y = n)) +
  geom_boxplot(width = 0.7, alpha = 0.8, aes(fill = group)) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
       caption = paste0("P = ", 
                        signif(wilcox.test(group1, group2)$p.value, 2),
                        " (Wilcoxon rank sum test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> box_pre_hometimeline

df_pre_hometimeline |>
  ggplot(aes(x = group, y = n)) +
  ggbeeswarm::geom_quasirandom(
    shape = 21, color = "white", 
    alpha = 0.8, size = 3,
    aes(fill = group)
  ) +
  scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
  labs(x = "Group",
       y = "Count",
        caption = paste0("P = ", 
                        signif(ks.test(group1, group2)$p.value, 2),
                        " (Kolmogorov–Smirnov test)")) +
  theme_classic() +
  theme(
    text = element_text(size = 12, face = "bold", color = "black"),
    axis.text = element_text(color = "black"),
    legend.position = "none",
    plot.title = element_text(size = 10),
    plot.caption = element_text(hjust = 0)
  ) +
  ggtitle(
    paste0(
      "group1: median = ", signif(median(group1), 2), 
      "; IQR = ", signif(IQR(group1), 2), "\n",
      "group2: median = ", signif(median(group2), 2), 
      "; IQR = ", signif(IQR(group2), 2)
    )
  ) -> dot_pre_hometimeline


wrap_plots(
  bar_pre_hometimeline, box_pre_hometimeline, dot_pre_hometimeline, nrow = 1
)  -> plot4

plot4 + plot_annotation('Plot 4. Pre-Treatment Exposure', 
                        theme=theme(plot.title=element_text(size = 12, 
                                                            face = "bold", 
                                                            color = "black",
                                                            hjust=0.5)))

ggsave("../Results/plot4_unmuted.png", height = 5, width = 10)

3 Note

Post-Treatment Engagements with LQ accounts that are “not muted” - From the truncated list (n=489), we chose 70% to mute. In this case, the rest of the 30% of this truncated list (n=146) would be LQ accounts ‘not muted’. - Instead, from Plot 1 ~ Plot 4, I used the full LQ list (not truncated); in this case, the LQ accounts ‘not muted’ includes 30% + those accounts that were cutoff due to truncation.
About data collection period
- I collected post-treatment data on Mar 14, which is 2 weeks after the start of Wave 2. Thus, the post-treatment data contains maximum 2 weeks ~ minimum 1 week, while pre-treatment data contains maximum 4 weeks.
- Although data collection time spans are different between pre- and post-treatment, we have maximum tweet cap both for engagement (max 300) and home timeline (max 400) and we don’t generally go over more than 2 weeks to hit the maximum cap.