data

Package Contents

download_dataset(dataset_name[, cache_path])

class BasicDataset(dataset_dir, batch_size=200, read_labels=False, as_tensor=True, contextual_embed=False, doc_embed_model='all-MiniLM-L6-v2', device='cpu')
vocab_size = 0
load_data(path, read_labels)
class RawDataset(docs, preprocess=None, batch_size=200, device='cpu', as_tensor=True, contextual_embed=False, pretrained_WE=False, doc_embed_model='all-MiniLM-L6-v2', embed_model_device=None, verbose=False)
train_data
train_texts
vocab
vocab_size
class CrosslingualDataset(dataset_dir, lang1, lang2, dict_path, device='cpu', batch_size=200, as_tensor=True)
batch_size = 200
train_size_en = 0
train_size_cn = 0
vocab_size_en = 0
vocab_size_cn = 0
pretrained_WE_en
pretrained_WE_cn
Map_en2cn
Map_cn2en
move_to_device(bow, device)
read_data(dataset_dir, lang)
parse_dictionary(dict_path)
get_Map(trans_matrix, bow)
class DynamicDataset(dataset_dir, batch_size=200, read_labels=False, device='cpu', as_tensor=True)
vocab_size = 0
train_size
num_times
train_time_wordfreq
load_data(path, read_labels)
get_time_wordfreq(bow, times)
download_dataset(dataset_name, cache_path='~/.topmost')