Build a BERTopic model — bt_compile

Keep *_model = NULL to proceed with a model made from default parameters (see each individual make_* function for parameters). However, it is not advisable to accept default parameters for each model; you should tune each model according to your dataset and the business question you are answering.

Usage

bt_compile_model(
  ...,
  embedding_model = NULL,
  reduction_model = NULL,
  clustering_model = NULL,
  vectoriser_model = NULL,
  ctfidf_model = NULL
)

Arguments

...: Additional arguments sent to bertopic.BERTopic()
embedding_model: Model for creating embeddings (Python object)
reduction_model: Model for reducing embeddings' dimensions (Python object)
clustering_model: Model for clustering (Python object)
vectoriser_model: Model for vectorising input for topic representations (Python object)
ctfidf_model: Model for performing class-based tf-idf (ctf-idf) (Python object)

Value

a BERTopic model

Examples

if (FALSE) { # \dontrun{
# model using all default parameters
model <- bt_compile_model()

# model with modular components already generated
# define embedding and reduction modules and pass to bt_compile_model
embedder <- bt_make_embedder_st("all-miniLM-L6-v2") 
reducer <- bt_make_reducer_umap(n_components = 10L, n_neighbours = 20L)
model <- bt_compile_model(embedding_model = embedder, reduction_model = reducer)

# Perform document embedding and reduction external to bertopic model and pass empty models to bt_compile_model
embedder <- bt_make_embedder_st("all-miniLM-L6-v2") # embedder
embeddings <- bt_do_embedding(embedder, docs, accelerator = NULL) # embeddings
reducer <- bt_make_reducer_umap(n_components = 10L, n_neighbours = 20L) # reducer
reduced_embeddings <- bt_do_reducing(reducer, embeddings) # reduced embeddings

# skip embedding and reduction step by passing empty models
model <- bt_compile_model(embedding_model = bt_empty_embedder(), reduction_model = bt_empty_reducer()) 

} # }