get_network()
is a helper function to extract topic correlation networks
as tidygraph objects and add labels and topic proportions.
The stm model for computing the correlation network.
The method for determining edges. Can be either 'simple'
or 'huge'
.
The correlation cutoff criterion for method = 'cutoff'
. Defaults to 0.05
.
An optional vector of topic labels. Must include a label for each topic of the model.
Remove isolated notes without any edges from the network. Defaults to FALSE
.
Returns tidygraph network of topic correlations.
library(stm)
library(ggraph)
#> Warning: package 'ggraph' was built under R version 4.2.3
library(quanteda)
# prepare data
data <- corpus(gadarian, text_field = 'open.ended.response')
docvars(data)$text <- as.character(data)
data <- tokens(data, remove_punct = TRUE) |>
tokens_wordstem() |>
tokens_remove(stopwords('english')) |> dfm() |>
dfm_trim(min_termfreq = 2)
out <- convert(data, to = 'stm')
# fit model
gadarian_10 <- stm(documents = out$documents,
vocab = out$vocab,
data = out$meta,
prevalence = ~ treatment + s(pid_rep),
K = 10,
max.em.its = 1, # reduce computation time for example
verbose = FALSE)
if (FALSE) {
# extract network
stm_corrs <- get_network(model = gadarian_10,
method = 'simple',
labels = paste('Topic', 1:10),
cutoff = 0.001,
cutiso = TRUE)
# plot network
ggraph(stm_corrs, layout = 'auto') +
geom_edge_link(
aes(edge_width = weight),
label_colour = '#fc8d62',
edge_colour = '#377eb8') +
geom_node_point(size = 4, colour = 'black') +
geom_node_label(
aes(label = name, size = props),
colour = 'black', repel = TRUE, alpha = 0.85) +
scale_size(range = c(2, 10), labels = scales::percent) +
labs(size = 'Topic Proportion', edge_width = 'Topic Correlation') +
scale_edge_width(range = c(1, 3)) +
theme_graph()
}