visualization2
Do Won Kim
2024-04-22
1 Data Description
library(tidyverse)
library(ggplot2)
library(DT)
library(haven)
library(ggpubr)
library(cowplot)
library(ggthemes)
library(readr)
library(ggExtra)
library(scales)
library(Rmisc)
library(ggbeeswarm) # to make jitter plots
library(RColorBrewer) # for the colors
library(patchwork)
library(ggstatsplot)
library(ggsci)
# load raw data
df_w1 <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1.rds")
df_w1_supp <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1_supp.rds")
df_w2 <- readRDS("~/Downloads/2023-095b_files/2023-095b_client_wave-2.rds")
# bind wave 1 data + filter only Progress == 100
df_w1 = rbind(df_w1, df_w1_supp)
df_w1 |>
select(-user_id) |>
filter(Progress==100) -> df_w1
# invited users list (for merge)
Users_to_invite_Wave2 <- read_csv("Users_to_invite_Wave2.csv",
col_types = cols(user_id = col_character()))
Users_to_invite_Wave2 |>
select(user_id) |>
mutate(userid = user_id) -> for_merge
# merged with invited user list
df_w2 |>
merge(for_merge, by='userid') -> w2_users
# merged with invited user list
w2_users |>
select(userid, user_id) |>
merge(df_w1, by='userid') -> w1_users
# drop randomized_group == 'drop' from wave 2
w2_users |>
filter(randomized_group != 'drop') |>
filter(randomized_group != 'muting_treatment2_drop') |>
select(userid, randomized_group) -> wave2_data
names(wave2_data) = c('user_id', 'randomized_group')
# 134 users without drop
# muted_accounts
muted_accounts <- read_csv("muted_accounts.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
# exposure data
pre_hometimeline <- read_csv("pre_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character())) |> select(!created_at)
pre_hometimeline |>
mutate(tag = "pre_hometimeline") -> pre_hometimeline
post_hometimeline <- read_csv("post_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character())) |> select(!created_at)
post_hometimeline |>
mutate(tag = "post_hometimeline") -> post_hometimeline
# engagement data
pre_engagement <- read_csv("pre_engagement_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
pre_engagement |>
mutate(tag = "pre_engagement") -> pre_engagement
post_engagement <- read_csv("post_engagement_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
post_engagement |>
mutate(tag = "post_engagement") -> post_engagement
# likes data
pre_likes <- read_csv("pre_likes_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
names(pre_likes)[3] = "type"
pre_likes |>
mutate(tag = "pre_likes") -> pre_likes
post_likes <- read_csv("post_likes_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
names(post_likes)[3] = "type"
post_likes |>
mutate(tag = "post_likes") -> post_likes
# truncated list
updated_inventory <- read_csv("updated_inventory.csv",
col_types = cols_only(target_user_id = col_character()))
reduced_inventory = updated_inventory[c(1:489),]
wave2_data |> merge(pre_engagement, by='user_id') -> this_1
wave2_data |> merge(post_engagement, by='user_id') -> this_2
wave2_data |> merge(pre_hometimeline, by='user_id') -> this_3
wave2_data |> merge(post_hometimeline, by='user_id') -> this_4
wave2_data |> merge(pre_likes, by='user_id') -> this_5
wave2_data |> merge(post_likes, by='user_id') -> this_6
rbind(this_1, this_2, this_3, this_4, this_5, this_6) -> test
reduced_inventory |> mutate(reduced = "yes") -> reduced_inventory
muted_accounts |> mutate(muted = "yes") -> muted_accounts
test |>
merge(muted_accounts, by = c('user_id', 'target_user_id'), all.x = TRUE) |>
mutate(
muted = ifelse(is.na(muted), "no", muted)
) |>
merge(reduced_inventory, by='target_user_id', all.x = TRUE) |>
mutate(
reduced = ifelse(is.na(reduced), "no", reduced)
) |>
relocate(c(user_id, randomized_group)) -> this_is_full_data
# count_engagement.csv
count_engagement <- read_csv("count_engagement.csv",
col_types = cols(user_id = col_character()))
this_is_full_data |>
merge(count_engagement, by=c('user_id','tag') ,all.x=TRUE) -> this_is_final_data
this_is_final_data |>
filter(user_id!="1165632418449186822" & user_id !='1590859787029213186' & user_id != '975899686774165504' & user_id != '375334044' & user_id != '1354968197078900743') -> this_is_final_data
Data: this_is_final_data
N=128 (control: 46, media_literacy: 40, muting_treatment1: 41, muting_treatment2: 1)
Let’s remove those accounts that blocked the study account or are locked/suspended (we could not collect post-treatment data).
✅ N=123 (control: 45, media_literacy: 38, muting_treatment1: 40)
(user level)
user_id
: Participants’ Twitter IDsrandomized_group
: Randomized group -media_literacy
,muting_treatment1/2
,control
tag
:pre_hometimeline/engagement
: home timeline (reverse chronological) or engagement data collected before treatment (wave 2)post_hometimeline/engagement
: home timeline or engagement data collected after treatment (wave 2) ~ till Mar 14- I collected post-treatment data on Mar 14, which is 2 weeks after the start of Wave 2. Thus, the data contains maximum 2 weeks ~ minimum 1 week post-treatment.
count
: Number of tweets collected
(user-LQ account dyad level)
target_user_id
: LQ accounts’ Twitter IDs (found in home timeline or engagement data)type
: Type of tweets related to LQ sourcesdirect
: Direct tweets mentioning LQ sources; Direct tweets produced by LQ sources for likesindirect
: (only for likes) likes of tweets that mention LQ sources (@) or their URLsreplied
: Reply tweets to LQ sourcesretweeted
: Any retweets of LQ tweetsquoted
: Any quote tweets of LQ tweets
muted
: Whether the LQ source is muted (“yes”) or not (“no”)reduced
: Whether the LQ source is included in the reduced list (“yes”) or not (“no”)
1.0.0.1 Filtering out quoted from exposure data
When we manually checked, it turned out API returned those tweets of LQ sources that are quoted by third-party accounts but we don’t see these tweets in home timelines. Hence, I removed all the type == “quoted” cases from home timeline tweets.
this_is_final_data |>
filter(!c(tag == "post_hometimeline" & type=="quoted")) |>
filter(!c(tag == "pre_hometimeline" & type=="quoted")) -> this_is_final_data
2 Task 1. First-Stage Check
For the mute group, post-treatment engagements/exposure to “muted” accounts vs. For the control group, post-treatment engagements/exposure to hypothetically “muted” accounts (accounts that would have been muted if they were assigned to the muting group)
- For
Muting
group: Post-treatment engagements with/exposure to/likes of “muted” sources - For
Control
group (control + media_literacy): Post-treatment engagements with/exposure to/likes of “hypothetically muted” sources
(This removed those users who have engagements/exposures exclusively to the LQ accounts that are “not” muted either in real or hypothetically, resulting in 94 unique users for the task 1.)
FYI) For muted accounts that you follow:
Replies and mentions by the muted account will still appear in your Notifications tab.
posts from a muted account – posted before the account was muted – will be removed from your Home timeline.
When you click or tap into a conversation, replies from muted accounts will be visible.
this_is_final_data |>
mutate(
group = ifelse(randomized_group=="media_literacy" |
randomized_group=='control', "Control", "Muting")
) |>
mutate(flag = ifelse(
group == "Control" & muted == "yes", "include",
# For Control group, only include cases where muted == "yes"
ifelse(
group == "Muting" & muted == "yes", "include",
# For Muting group, only include cases where muted == "yes"
"drop"
# The rest of the cases (muted == "no"), drop
)
)) |>
filter(flag == "include") |> select(!flag) -> test_again
test_again |>
select(user_id, tag, randomized_group, count) |>
group_by(user_id, tag) |>
count() |> ungroup() -> test_for_muted
# test_for_muted - if there are no categories (tag), freq == 0
test_for_muted |>
select(user_id, tag, freq) |>
pivot_wider(names_from=tag, values_from=freq) |>
mutate_if(is.numeric , replace_na, replace = 0) |>
relocate(user_id,
pre_hometimeline, post_hometimeline,
pre_engagement, post_engagement,
pre_likes, post_likes) -> test_for_muted_wider
test_for_muted_wider |>
select(user_id, pre_hometimeline:post_likes) |>
pivot_longer(cols=pre_hometimeline:post_likes,
names_to = "tag",
values_to = "n") -> test_for_muted_longer
test_again |>
select(user_id, tag, count) |>
unique() |>
merge(test_for_muted_longer, by=c("user_id", "tag"), all=TRUE) |>
mutate_if(is.numeric , replace_na, replace = 0) -> test_for_muted_longer2
test_again |> select(user_id, tag, randomized_group) |> unique() -> for_merge_muted_1
for_merge_muted_1 |>
select(user_id, randomized_group) |>
unique() -> for_merge_muted_2
test_for_muted_longer2 |>
merge(for_merge_muted_1, by = c('user_id', 'tag'), all.x = T) |>
select(user_id, tag, count, n) |>
merge(for_merge_muted_2, by = "user_id") |>
mutate(group = ifelse(randomized_group == "control" |
randomized_group == "media_literacy",
"Control", "Muting")) -> test_for_muted_longer3
# backup
df_plot_muted = test_for_muted_longer3
2.1 Plot 1. Post-treatment engagement
Post-treatment engagement (tweets, re-tweets, quotes, replies) with
(hypothetically) muted accounts between control
vs. muting
df_plot_muted |>
mutate(rate = (n/count)*100) |>
mutate_if(is.numeric , replace_na, replace = 0) |>
mutate(rate = round(rate,1)) |>
filter(tag == 'post_engagement') |> # tag == 'post_engagement' filter
select(user_id, group, n, rate) -> df_post_engagement
df_post_engagement |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_post_engagement |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_post_engagement |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
scale_y_continuous(n.breaks=15) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> bar_post_engagement
df_post_engagement |>
ggplot(aes(x = group, y = n)) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.8, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.8, size = 3,
aes(fill = group)) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
scale_y_sqrt(n.breaks=15) +
labs(x = "Group",
y = "Count (sqrt scale)",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> sqrt_post_engagement
plot_post_engage <- bar_post_engagement +
plot_annotation('Plot 1. Post-Treatment Engagements',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_engage
The y-axis (Count
) indicates the number of tweets
related to LQ sources that one engaged with (=engagement). These include
direct mentions of LQ accounts, replies, retweets, and quote tweets of
tweets created by LQ accounts, as well as direct URLs to these LQ
domains.
Group: Control
includes control group and media literacy
group, while Muting
includes muting treatment groups. For
Muting
group, only engagements with those LQ accounts that
are muted are counted. Note that in pilot study, we muted 70%
of the truncated list (truncated list: n=489).
Hence, Plot 1 allows us to compare the post-treatment engagements
with (hypothetically) muted LQ accounts between Control
and
Muting
groups.
For most participants, we see low levels of engagements with LQ accounts with a few outliers (active ones). Given this highly skewed distribution, and to be careful with comparison, I overlayed the same quantities but with different ways of visualizing them.
Bar plot
Comparison: mean
Error bars indicate SEs of the mean
t-test statistics (p-value presented)
Dot plot
Comparison: distribution
K-S test (nonparametric)
2.1.0.1 Another approach of visualizing engagements
Ro’ee: “[…] due to outliers and the many zeros, another approach is to create a binary variable of whether someone engaged with any low-quality content, and then compare that share engaging with low quality content across treatments. […] To focus on the extensive margin (any activity) instead of the intensive margin (how much activity) […]”
df_post_engagement |>
mutate(
engaged = ifelse(n==0, "NO", "YES")
) |>
select(engaged, group) |>
group_by(group, engaged) |> count()
## engaged group freq
## 1 NO Control 61
## 2 NO Muting 21
## 3 YES Control 5
## 4 YES Muting 3
Only 5 out of 66 in Control (8%=5/66) + 3 out of 24 Muting group (12%) have non-zero post-treatment engagement.
2.2 Plot 2. Post-treatment exposure
Post-treatment exposure between control
vs. muting
(for muted accounts)
df_plot_muted |>
filter(tag == "post_hometimeline") |> # tag == 'post_hometimeline' filter
select(user_id, group, n) -> df_post_hometimeline
df_post_hometimeline |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_post_hometimeline |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_post_hometimeline |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_post_hometimeline
plot_post_exposure <- bar_post_hometimeline +
plot_annotation('Plot 2-1. Post-Treatment Exposure',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_exposure
The y-axis (Count
) indicates the number of tweets from
LQ sources (that are muted) found in one’s home timeline
(=exposure).
Hence, Plot 2 allows us to compare the post-treatment exposures to
(hypothetically) muted LQ accounts between Control
and
Muting
groups.
2.2.1 Y-axis: Count → Activity Rate
Since we pull reverse chronological home timeline (exposure data) once, how about calculating activity rates, instead of using count?
- Activity Rate = (# of muted LQ tweets) / (timestamp of the last appeared LQ tweet - timestamp of the first appeared LQ tweet in the collected data)
# exposure data
post_hometimeline_final <- read_csv("post_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
post_hometimeline_final |> filter(type!="quoted") -> post_hometimeline_final
aggregate(post_hometimeline_final$created_at,
by=list(post_hometimeline_final$user_id), max) -> max_agg
aggregate(post_hometimeline_final$created_at,
by=list(post_hometimeline_final$user_id), min) -> min_agg
names(max_agg) = c("user_id", "max_time")
names(min_agg) = c("user_id", "min_time")
max_agg |> merge(min_agg, by='user_id') |>
mutate( timeinterval= as.duration(min_time %--% max_time),
in_days = round(as.numeric(max_time - min_time)/86400, 5),
in_hours =round(as.numeric(max_time - min_time)/3600, 5)) -> merge_agg
# merge timegap variables into df_post_hometimeline
df_post_hometimeline |>
merge(merge_agg, by='user_id', all.x=TRUE) |>
mutate_at(c(6:8), ~replace(., is.na(.), 0)) -> post_home_merged
post_home_merged |>
mutate(activity_rate = n/in_days) |>
mutate_at(c(9), ~replace(., is.nan(.), 0)) |>
mutate_at(c(9), ~replace(., is.infinite(.), 0)) -> post_home_merged
Plot 2-2. Post-Treatment Exposure (Daily activity rate)
- tweets / day
# post_home_merged
post_home_merged |>
filter(group == "Control") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group1
post_home_merged |>
filter(group == "Muting") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group2
post_home_merged |>
ggplot(aes(x = group, y = activity_rate)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "tweets / day",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_post_hometimeline_rate
plot_post_exposure_rate <- bar_post_hometimeline_rate +
plot_annotation('Plot 2-2. Post-Treatment Exposure (Daily activity rate)',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_exposure_rate
Setting the y-axis into log10 scale to better understand the distribution:
- log10(1+y) due to many zeros!
post_home_merged |>
ggplot(aes(x = group, y = log10(activity_rate+1))) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "log10((tweets/day)+1)",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> bar_post_hometimeline_rate_log
plot_post_exposure_rate_log <- bar_post_hometimeline_rate_log +
plot_annotation('Plot 2-3. Post-Treatment Exposure (Daily activity rate; log10(y+1))',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_exposure_rate_log
2.3 Plot 3. Post-treatment Likes
df_plot_muted |>
filter(tag == "post_likes") |> # tag == 'post_likes' filter
select(user_id, group, n) -> df_post_likes
df_post_likes |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_post_likes |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_post_likes |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_post_likes
plot_post_likes <- bar_post_likes +
plot_annotation('Plot 3. Post-Treatment Likes',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_likes
The y-axis (Count
) indicates the number of likes to
tweets related to LQ sources that are muted (=likes).
Hence, Plot 3 allows us to compare the post-treatment likes to
(hypothetically) muted LQ accounts between Control
and
Muting
groups.
2.3.1 Takeaways
- Overall, engagement levels are very low (only handful of participants actively engaging with LQ sources). This might resulted in minimal first-stage effect.
- For post-treatment home timeline exposure, we do see some meaningful differences between control vs. muting groups.
2.4 Plot 4. Pre-treatment engagement
Pre-treatment engagement between control
vs. muting
(for muted accounts)
# retrieve recruitment mode
w1_users |> select(user_id, vs_recruitment) -> recruitmode
# merge
df_plot_muted |>
merge(recruitmode, by='user_id', all.x = TRUE) -> df_plot_muted_recruit
df_plot_muted_recruit |>
filter(tag == "pre_engagement") |>
select(user_id, group, n, vs_recruitment) -> df_pre_engagement
df_pre_engagement |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_engagement |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_engagement |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_engagement
bar_pre_engagement + plot_annotation('Plot 4. Pre-Treatment Engagements',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
2.5 Plot 5. Pre-treatment exposure
Pre-treatment exposure between control
vs. muting
(for muted accounts)
# Use df_plot_unmuted
df_plot_muted_recruit |>
filter(tag == "pre_hometimeline") |> # tag == 'post_hometimeline' filter
select(user_id, group, n, vs_recruitment) -> df_pre_hometimeline
pre_hometimeline_final <- read_csv("pre_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
pre_hometimeline_final |> filter(type!='quoted') -> pre_hometimeline_final
aggregate(pre_hometimeline_final$created_at,
by=list(pre_hometimeline_final$user_id), max) -> max_agg
aggregate(pre_hometimeline_final$created_at,
by=list(pre_hometimeline_final$user_id), min) -> min_agg
names(max_agg) = c("user_id", "max_time")
names(min_agg) = c("user_id", "min_time")
max_agg |> merge(min_agg, by='user_id') |>
mutate( timeinterval= as.duration(min_time %--% max_time),
in_days = round(as.numeric(max_time - min_time)/86400, 5),
in_hours =round(as.numeric(max_time - min_time)/3600, 5)) -> merge_agg
# merge timegap variables into df_pre_hometimeline
df_pre_hometimeline |>
merge(merge_agg, by='user_id', all.x=TRUE) |>
mutate_at(c(7:9), ~replace(., is.na(.), 0)) -> pre_home_merged
pre_home_merged |>
mutate(activity_rate = n/in_days) |>
mutate_at(c(10), ~replace(., is.nan(.), 0)) |>
mutate_at(c(10), ~replace(., is.infinite(.), 0)) -> pre_home_merged
pre_home_merged |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
pre_home_merged |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
pre_home_merged |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_exposure
bar_pre_exposure + plot_annotation('Plot 5-1. Pre-Treatment Exposure',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
# post_home_merged
pre_home_merged |>
filter(group == "Control") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group1
pre_home_merged |>
filter(group == "Muting") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group2
pre_home_merged |>
ggplot(aes(x = group, y = activity_rate)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "tweets / day",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_hometimeline_rate
plot_pre_exposure_rate <- bar_pre_hometimeline_rate +
plot_annotation('Plot 5-2. Pre-Treatment Exposure (Daily activity rate)',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_pre_exposure_rate
pre_home_merged |>
ggplot(aes(x = group, y = log10(activity_rate+1))) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "log10((tweets/day)+1)",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> bar_pre_hometimeline_rate_log
plot_pre_exposure_rate_log <- bar_pre_hometimeline_rate_log +
plot_annotation('Plot 5-3. Pre-Treatment Exposure (Daily activity rate; log10(y+1))',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_pre_exposure_rate_log
2.6 Plot 6. Pre-treatment likes
Pre-treatment exposure between control
vs. muting
(for muted accounts)
df_plot_muted_recruit |>
filter(tag == "pre_likes") |>
select(user_id, group, n, vs_recruitment) -> df_pre_likes
df_pre_likes |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_likes |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_likes |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_likes
bar_pre_likes + plot_annotation('Plot 6. Pre-Treatment Likes',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
2.6.1 Takeaways
In general, no differences between control vs. muting groups in pre-treatment engagements, exposure, and likes!
2.7 Plot by Recruitment Mode
Pre-treatment Engagement
# ENGAGEMENT
df_pre_engagement |>
filter(vs_recruitment == "Twitter recruitment") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_engagement |>
filter(vs_recruitment == "New panelist recruited after January 1, 2024") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_engagement |>
filter(vs_recruitment == "Existing panelist") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group3
df_pre_engagement |>
ggplot(aes(x = vs_recruitment, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_engagement_recruit
bar_pre_engagement_recruit +
plot_annotation('Plot 7. Pre-Treatment Engagement by Recruitment Mode',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
Pre-treatment Exposure
# Use pre_home_merged
pre_home_merged |>
filter(vs_recruitment == "Twitter recruitment") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
pre_home_merged |>
filter(vs_recruitment == "New panelist recruited after January 1, 2024") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
pre_home_merged |>
filter(vs_recruitment == "Existing panelist") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group3
pre_home_merged |>
ggplot(aes(x = vs_recruitment, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_exposure_recruit
bar_pre_exposure_recruit +
plot_annotation('Plot 8-1. Pre-Treatment Exposure by Recruitment Mode',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
Plot 8-2. Y-axis into activity rate
# Use pre_home_merged
pre_home_merged |>
filter(vs_recruitment == "Twitter recruitment") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group1
pre_home_merged |>
filter(vs_recruitment == "New panelist recruited after January 1, 2024") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group2
pre_home_merged |>
filter(vs_recruitment == "Existing panelist") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group3
pre_home_merged |>
ggplot(aes(x = vs_recruitment, y = activity_rate)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "tweet / day") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_exposure_recruit
bar_pre_exposure_recruit +
plot_annotation('Plot 8-2. Pre-Treatment Exposure by Recruitment Mode (Daily Activitiy Rate)',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
pre_home_merged |>
ggplot(aes(x = vs_recruitment, y = log10(activity_rate+1))) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "log10((tweets/day)+1)") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_exposure_recruit_log
bar_pre_exposure_recruit_log +
plot_annotation('Plot 8-3. Pre-Treatment Exposure by Recruitment Mode (Daily activity rate; ; log10(y+1))',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
pre-treatment likes
# LIKES
df_pre_likes |>
filter(vs_recruitment == "Twitter recruitment") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_likes |>
filter(vs_recruitment == "New panelist recruited after January 1, 2024") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_likes |>
filter(vs_recruitment == "Existing panelist") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group3
df_pre_likes |>
ggplot(aes(x = vs_recruitment, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_likes_recruit
bar_pre_likes_recruit + plot_annotation('Plot 9. Pre-Treatment Likes by Recruitment Mode',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
3 [Hints] Hypotheses?
Hypothesis 1: Assignment to the muting treatment (H1a) and the media literacy treatment (H1b) will reduce total engagement with low-quality news sources during the treatment period. The effect of the muting treatment will be larger (H1c).
this_is_final_data |>
select(user_id, tag, randomized_group, count) |>
group_by(user_id, tag) |>
count() |> ungroup() -> test_for_all
# test_for_all - if there are no categories (tag), freq == 0
test_for_all |>
select(user_id, tag, freq) |>
pivot_wider(names_from=tag, values_from=freq) |>
mutate_if(is.numeric , replace_na, replace = 0) |>
relocate(user_id,
pre_hometimeline, post_hometimeline,
pre_engagement, post_engagement,
pre_likes, post_likes) -> test_for_all_wider
test_for_all_wider |>
select(user_id, pre_hometimeline:post_likes) |>
pivot_longer(cols=pre_hometimeline:post_likes,
names_to = "tag",
values_to = "n") -> test_for_all_longer
test_for_all |>
select(user_id, tag, count) |>
unique() |>
merge(test_for_muted_longer, by=c("user_id", "tag"), all=TRUE) |>
mutate_if(is.numeric , replace_na, replace = 0) -> test_for_all_longer2
test_for_all |>
select(user_id, tag, randomized_group) |>
unique() -> for_merge_all_1
for_merge_all_1 |>
select(user_id, randomized_group) |>
unique() -> for_merge_all_2
test_for_all_longer2 |>
merge(for_merge_all_1,
by = c('user_id', 'tag'), all.x = T) |>
select(user_id, tag, count, n) |>
merge(for_merge_all_2, by = "user_id") -> test_for_all_longer3
# backup
df_plot_all = test_for_all_longer3
df_plot_all |>
filter(tag=="pre_likes" | tag=="pre_engagement") |>
select(!count) |>
pivot_wider(id_cols=c('user_id', 'randomized_group'),
names_from = "tag", values_from="n") |>
mutate_if(is.numeric , replace_na, replace = 0) |>
mutate(pre_total_engagement = pre_likes + pre_engagement) |>
select(user_id, randomized_group, pre_total_engagement) -> pre_total_engage
df_plot_all |>
filter(tag=="post_likes" | tag=="post_engagement") |>
select(!count) |>
pivot_wider(id_cols=c('user_id', 'randomized_group'),
names_from = "tag", values_from="n") |>
mutate_if(is.numeric , replace_na, replace = 0) |>
mutate(post_total_engagement = post_likes + post_engagement) |>
select(user_id, randomized_group, post_total_engagement) -> post_total_engage
pre_total_engage |>
merge(post_total_engage, by=c("user_id", "randomized_group")) -> merge_total_engage
merge_total_engage |>
ggplot(aes(x = randomized_group, y = pre_total_engagement)) +
geom_violin(aes(fill = randomized_group), alpha=0.3) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = randomized_group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = randomized_group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Randomized Group",
y = "Count (sqrt scale)") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Muting: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"Media Literacy: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Control: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) + scale_y_sqrt() +
plot_annotation('Pre-Treatment Total Engagement',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
Muting, Media Literacy, Control
→ Y: Post-Treatment Engagements+Likes with LQ sources
Same plots but with randomized_group + combined measure of post-treatment engagements+likes
# df_plot_all_merged - pre_total_engagement, post_total_engagement (=count)
merge_total_engage |>
filter(randomized_group == "muting_treatment1") |>
select(post_total_engagement) |>
as.vector() |>
unlist() |>
as.integer() -> group1
merge_total_engage |>
filter(randomized_group == "media_literacy") |>
select(post_total_engagement) |>
as.vector() |>
unlist() |>
as.integer() -> group2
merge_total_engage |>
filter(randomized_group == "control") |>
select(post_total_engagement) |>
as.vector() |>
unlist() |>
as.integer() -> group3
merge_total_engage |>
ggplot(aes(x = randomized_group, y = post_total_engagement)) +
geom_violin(aes(fill = randomized_group), alpha=0.3) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = randomized_group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = randomized_group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Randomized Group",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Muting: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"Media Literacy: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Control: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_post_total_engage
bar_post_total_engage + scale_y_sqrt() +
plot_annotation('Post-Treatment Total Engagement',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
merge_total_engage |>
mutate(randomized_group = ifelse(randomized_group=="media_literacy", "Media Literacy", ifelse(randomized_group=="control", "Control", "Muting"))) |>
pivot_longer(cols=c("pre_total_engagement", "post_total_engagement")) |>
mutate(group = factor(name,
levels = c('pre_total_engagement',
'post_total_engagement'))) |>
grouped_ggwithinstats(x=group, y=value,
grouping.var = randomized_group,
type = "np",
p.adjust.methods = 'fdr',
ggtheme = theme_pubr(),
results.subtitle = FALSE,
ggplot.component = list(
scale_y_continuous(limits = c(0,60)),
scale_color_manual(values = brewer.pal(8, "Accent")[1:2]),
xlab(""),
ylab("Count")
),
annotation.args = plot_annotation(
'Pre-Treatment vs. Post-Treatment Total Engagement',
theme=theme(plot.title=element_text(size = 14,
face = "bold",
color = "black",
hjust=0.5))))
Research question 2: Will assignment to the muting treatment affect total engagement with low-quality news sources excluded from the treatment (i.e., those that are not muted) during the treatment period?
this_is_final_data |>
filter(muted == "no") |>
select(user_id, tag, randomized_group, count) |>
group_by(user_id, tag) |>
count() |> ungroup() -> test_for_nonmuted
# test_for_nonmuted - if there are no categories (tag), freq == 0
test_for_nonmuted |>
select(user_id, tag, freq) |>
pivot_wider(names_from=tag, values_from=freq) |>
mutate_if(is.numeric , replace_na, replace = 0) |>
relocate(user_id,
pre_hometimeline, post_hometimeline,
pre_engagement, post_engagement,
pre_likes, post_likes) -> test_for_nonmuted_wider
test_for_nonmuted_wider |>
select(user_id, pre_hometimeline:post_likes) |>
pivot_longer(cols=pre_hometimeline:post_likes,
names_to = "tag",
values_to = "n") -> test_for_nonmuted_longer
test_for_nonmuted |>
select(user_id, tag, count) |>
unique() |>
merge(test_for_muted_longer, by=c("user_id", "tag"), all=TRUE) |>
mutate_if(is.numeric , replace_na, replace = 0) -> test_for_nonmuted_longer2
test_for_nonmuted |>
select(user_id, tag, randomized_group) |> unique() -> for_merge_nonmuted_1
for_merge_nonmuted_1 |>
select(user_id, randomized_group) |>
unique() -> for_merge_nonmuted_2
test_for_nonmuted_longer2 |>
merge(for_merge_nonmuted_1, by = c('user_id', 'tag'), all.x = T) |>
select(user_id, tag, count, n) |>
merge(for_merge_nonmuted_2, by = "user_id") -> test_for_nonmuted_longer3
# backup
df_plot_nonmuted = test_for_nonmuted_longer3
# df for visualization
df_plot_nonmuted |>
filter(tag=="pre_likes" | tag=="pre_engagement") |>
select(!count) |>
pivot_wider(id_cols=c('user_id', 'randomized_group'),
names_from = "tag", values_from="n") |>
mutate_if(is.numeric , replace_na, replace = 0) |>
mutate(pre_total_engagement = pre_likes + pre_engagement) |>
select(user_id, randomized_group, pre_total_engagement) -> pre_total_engage_nonmuted
df_plot_nonmuted |>
filter(tag=="post_likes" | tag=="post_engagement") |>
select(!count) |>
pivot_wider(id_cols=c('user_id', 'randomized_group'),
names_from = "tag", values_from="n") |>
mutate_if(is.numeric , replace_na, replace = 0) |>
mutate(post_total_engagement = post_likes + post_engagement) |>
select(user_id, randomized_group, post_total_engagement) -> post_total_engage_nonmuted
pre_total_engage_nonmuted |>
merge(post_total_engage_nonmuted,
by=c("user_id", "randomized_group")) -> merge_total_engage_nonmuted
merge_total_engage_nonmuted |>
filter(randomized_group == "muting_treatment1") |>
select(post_total_engagement) |>
as.vector() |>
unlist() |>
as.integer() -> group1
merge_total_engage_nonmuted |>
filter(randomized_group == "media_literacy") |>
select(post_total_engagement) |>
as.vector() |>
unlist() |>
as.integer() -> group2
merge_total_engage_nonmuted |>
filter(randomized_group == "control") |>
select(post_total_engagement) |>
as.vector() |>
unlist() |>
as.integer() -> group3
merge_total_engage_nonmuted |>
ggplot(aes(x = randomized_group, y = post_total_engagement)) +
geom_violin(aes(fill = randomized_group), alpha=0.3) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = randomized_group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = randomized_group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Randomized Group",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Muting: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"Media Literacy: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Control: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_post_total_engage_nonmuted
bar_post_total_engage_nonmuted + scale_y_sqrt() +
plot_annotation('Post-Treatment Total Engagement with Non-Muted LQ Sources',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
merge_total_engage_nonmuted |>
mutate(randomized_group = ifelse(randomized_group=="media_literacy", "Media Literacy", ifelse(randomized_group=="control", "Control", "Muting"))) |>
pivot_longer(cols=c("pre_total_engagement", "post_total_engagement")) |>
mutate(group = factor(name,
levels = c('pre_total_engagement',
'post_total_engagement'))) |>
grouped_ggwithinstats(x=group, y=value,
grouping.var = randomized_group,
type = "np",
p.adjust.methods = 'fdr',
ggtheme = theme_pubr(),
results.subtitle = FALSE,
ggplot.component = list(
scale_y_continuous(limits = c(0,60)),
scale_color_manual(values = brewer.pal(8, "Accent")[1:2]),
xlab(""),
ylab("Count")
),
annotation.args = plot_annotation(
'Pre-Treatment vs. Post-Treatment Total Engagement with Non-Muted LQ Sources',
theme=theme(plot.title=element_text(size = 14,
face = "bold",
color = "black",
hjust=0.5))))
4 Note
- About data collection
I collected post-treatment data on Mar 26, which is ~26 days after the start of Wave 2. Thus, the post-treatment data contains minimum 19 days ~ maximum 26 days post Wave 2, while pre-treatment data contains maximum 4 weeks.
- FYI) For pre-treatment likes, the date can go way back.
Post-treatment data: Considering that muting took ~1 day or more, I filtered data with created_at >= Wave 2 session start time + 2 days, to make sure that I only include tweets from LQ sources after the muting job is done.