visualization
Do Won Kim
2024-03-12
1 Scatter Plot
Ro’ee: I think it would be useful at some point to create a scatter plot with WTA in round 1 in the x-axis and round 2 in the y-axis (and also just report the correlation). It’s a bit surprising that so many people provided a different answer, we should figure out if this is related to the question wording.
1.1 Plot 1.
- 14 cases removed (over $100)
library(tidyverse)
library(ggplot2)
library(DT)
library(haven)
library(ggpubr)
library(cowplot)
library(ggthemes)
library(readr)
library(ggExtra)
library(scales)
wta_df <- read_csv("wta/wta_df.csv", col_types = cols(userid = col_character(),
wta_w1 = col_integer(),
wta_w2 = col_integer(),
w2_compre1 = col_integer(),
w2_compre2 = col_integer()))
plot0 <- ggplot(wta_df, mapping = aes(wta_w1, wta_w2)) +
geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
geom_hline(yintercept = 15, lty = 2, color = 'red') +
geom_vline(xintercept = 15, lty = 2, color = 'red') +
geom_point(alpha=0.6) +
stat_cor(method = 'pearson') +
xlim(c(0, 100)) +
ylim(c(0, 100)) +
theme_few() +
theme(legend.position = 'bottom') +
xlab('WTA distribution in Wave 1 (0~100)') +
ylab('WTA distribution in Wave 2 (0~100)')
ggMarginal(plot0, fill='gray')
1.2 Plot 2.
Group by WTA group assigned in Wave 1
assigned_wta_w1
: In Wave 1, what condition was the user assigned toscale_within
: user chose a number within 0~30 scalescale_over30
: user chose ‘over 30’ and then put the WTA amountopen-ended
: user assigned to open-ended version (as in Wave 2)
plot1 <- ggplot(wta_df, mapping = aes(wta_w1, wta_w2, col=assigned_wta_w1)) +
geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
geom_hline(yintercept = 15, lty = 2, color = 'red') +
geom_vline(xintercept = 15, lty = 2, color = 'red') +
geom_point(alpha=0.6) +
stat_cor(method = 'pearson') +
xlim(c(0, 100)) +
ylim(c(0, 100)) +
theme_few() +
theme(legend.position = 'bottom') +
xlab('WTA distribution in Wave 1 (0~100)') +
ylab('WTA distribution in Wave 2 (0~100)') +
ggtitle('Scatter Plot')
ggMarginal(plot1, groupColour = TRUE, groupFill = TRUE)
1.3 Plot 3.
Group by
category
based on WTA $always-under15
: user chose below $15 in both wavesalways-over15
: user chose $15 or over in both wavesfavorable-shift
: user chose $15 or over in wave 1 but shifted below $15 in wave 2unfavorable-shift
: user chose below $15 in wave 1 but shifted $15 or over in wave 2
wta_df |>
mutate(
category = ifelse(
wta_w1 < 15 & wta_w2 < 15, "always-under15",
ifelse(wta_w1 >= 15 & wta_w2 >= 15, "always-over15",
ifelse(wta_w1 >=15 & wta_w2 < 15, "favorable-shift", "unfavorable-shift")))
) |>
filter(!is.na(wta_w1)) -> wta_df
ggplot(wta_df, mapping = aes(wta_w1, wta_w2, col=category)) +
geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
geom_point(alpha=0.6) +
geom_hline(yintercept = 15, lty = 2, color = 'red') +
geom_vline(xintercept = 15, lty = 2, color = 'red') +
theme_few() +
xlim(c(0, 100)) +
ylim(c(0, 100)) +
stat_cor(method = 'pearson') +
xlab('WTA distribution in Wave 1 (0~100)') +
ylab('WTA distribution in Wave 2 (0~100)') +
theme(legend.position='bottom') -> plot2
ggMarginal(plot2, groupColour = TRUE, groupFill = TRUE)
1.4 Plot 4.
log10 scales for both x, y axes
no filtering out those WTA > $100
ggplot(wta_df, mapping = aes(wta_w1, wta_w2, col=category)) +
geom_abline(intercept = 0, slope = 1, lty = 3, color = 'black') +
geom_point(alpha=0.6) +
geom_hline(yintercept = 15, lty = 2, color = 'red') +
geom_vline(xintercept = 15, lty = 2, color = 'red') +
scale_x_log10(labels = label_log(digits = 2)) +
scale_y_log10(labels = label_log(digits = 2)) +
theme_few() +
stat_cor(method = 'pearson') +
xlab('WTA distribution in Wave 1 (log10 scale)') +
ylab('WTA distribution in Wave 2 (log10 scale)') +
theme(legend.position='bottom') -> plot3
ggMarginal(plot3, groupColour = TRUE, groupFill = TRUE)
2 Other plots or exploratory analyses based on Wave 1 & 2 responses?
- Any ideas?