basic_dataset ============= .. py:module:: topmost.data.basic_dataset Module Contents --------------- .. autoapisummary:: topmost.data.basic_dataset.DocEmbedModel topmost.data.basic_dataset.RawDataset topmost.data.basic_dataset.BasicDataset .. py:class:: DocEmbedModel(model: Union[str, callable] = 'all-MiniLM-L6-v2', device: str = 'cpu', verbose: bool = False) .. py:attribute:: verbose :value: False .. py:method:: encode(docs: List[str], convert_to_tensor: bool = False) .. py:class:: RawDataset(docs, preprocess=None, batch_size=200, device='cpu', as_tensor=True, contextual_embed=False, pretrained_WE=False, doc_embed_model='all-MiniLM-L6-v2', embed_model_device=None, verbose=False) .. py:attribute:: train_data .. py:attribute:: train_texts .. py:attribute:: vocab .. py:attribute:: vocab_size .. py:class:: BasicDataset(dataset_dir, batch_size=200, read_labels=False, as_tensor=True, contextual_embed=False, doc_embed_model='all-MiniLM-L6-v2', device='cpu') .. py:attribute:: vocab_size :value: 0 .. py:method:: load_data(path, read_labels)