extract topic correlation network — get

get_network() is a helper function to extract topic correlation networks as tidygraph objects and add labels and topic proportions.

Arguments

model: The stm model for computing the correlation network.
method: The method for determining edges. Can be either 'simple' or 'huge'.
cutoff: The correlation cutoff criterion for method = 'cutoff'. Defaults to 0.05.
labels: An optional vector of topic labels. Must include a label for each topic of the model.
cutiso: Remove isolated notes without any edges from the network. Defaults to FALSE.

Value

Returns tidygraph network of topic correlations.

Examples



library(stm)
library(ggraph)
#> Warning: package 'ggraph' was built under R version 4.2.3
library(quanteda)

# prepare data
data <- corpus(gadarian, text_field = 'open.ended.response')
docvars(data)$text <- as.character(data)

data <- tokens(data, remove_punct = TRUE) |>
  tokens_wordstem() |>
  tokens_remove(stopwords('english')) |> dfm() |>
  dfm_trim(min_termfreq = 2)

out <- convert(data, to = 'stm')

# fit model
gadarian_10 <- stm(documents = out$documents,
                   vocab = out$vocab,
                   data = out$meta,
                   prevalence = ~ treatment + s(pid_rep),
                   K = 10,
                   max.em.its = 1, # reduce computation time for example
                   verbose = FALSE)

if (FALSE) {
# extract network
stm_corrs <- get_network(model = gadarian_10,
                         method = 'simple',
                         labels = paste('Topic', 1:10),
                         cutoff = 0.001,
                         cutiso = TRUE)


# plot network
ggraph(stm_corrs, layout = 'auto') +
  geom_edge_link(
    aes(edge_width = weight),
    label_colour = '#fc8d62',
    edge_colour = '#377eb8') +
  geom_node_point(size = 4, colour = 'black')  +
  geom_node_label(
    aes(label = name, size = props),
    colour = 'black',  repel = TRUE, alpha = 0.85) +
  scale_size(range = c(2, 10), labels = scales::percent) +
  labs(size = 'Topic Proportion',  edge_width = 'Topic Correlation') +
  scale_edge_width(range = c(1, 3)) +
  theme_graph()
}