get_network()
is a helper function to extract topic correlation networks
as tidygraph objects and add labels and topic proportions.
model | The stm model for computing the correlation network. |
---|---|
method | The method for determining edges. Can be either |
cutoff | The correlation cutoff criterion for |
labels | An optional vector of topic labels. Must include a label for each topic of the model. |
cutiso | Remove isolated notes without any edges from the network. Defaults to |
Returns tidygraph network of topic correlations.
library(stm) library(ggraph) library(quanteda) # prepare data data <- corpus(gadarian, text_field = 'open.ended.response') docvars(data)$text <- as.character(data) data <- dfm(data, stem = TRUE, remove = stopwords('english'), remove_punct = TRUE) out <- convert(data, to = 'stm') # fit model gadarian_10 <- stm(documents = out$documents, vocab = out$vocab, data = out$meta, prevalence = ~ treatment + s(pid_rep), K = 10, max.em.its = 1, # reduce computation time for example verbose = FALSE) # extract network stm_corrs <- get_network(model = gadarian_10, method = 'simple', labels = paste('Topic', 1:10), cutoff = 0.001, cutiso = TRUE) # \dontrun{ # plot network ggraph(stm_corrs, layout = 'fr') + geom_edge_link( aes(edge_width = weight), label_colour = '#fc8d62', edge_colour = '#377eb8') + geom_node_point(size = 4, colour = 'black') + geom_node_label( aes(label = name, size = props), colour = 'black', repel = TRUE, alpha = 0.85) + scale_size(range = c(2, 10), labels = scales::percent) + labs(size = 'Topic Proportion', edge_width = 'Topic Correlation') + scale_edge_width(range = c(1, 3)) + theme_graph()# }