NLL interpretretation tool sets

Interpret huggingface tokenizer

class InterpEmbeddingsTokenizer[source]

InterpEmbeddingsTokenizer(embedding_matrix, tokenizer) :: InterpEmbeddings

interp = InterpEmbeddings(embedding_matrix, vocab_dict)

interp.search("computer")

visualize the embedding with tensorboard

interp.visualize_in_tb()

from transformers import AutoTokenizer, AutoModel

PRETRAINED = "albert-base-v2"

model = AutoModel.from_pretrained(PRETRAINED)

tokenizer = AutoTokenizer.from_pretrained(PRETRAINED)
tokenizer.special_tokens_map
{'bos_token': '[CLS]',
 'eos_token': '[SEP]',
 'unk_token': '<unk>',
 'sep_token': '[SEP]',
 'pad_token': '<pad>',
 'cls_token': '[CLS]',
 'mask_token': '[MASK]'}
model.embeddings
AlbertEmbeddings(
  (word_embeddings): Embedding(30000, 128, padding_idx=0)
  (position_embeddings): Embedding(512, 128)
  (token_type_embeddings): Embedding(2, 128)
  (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
  (dropout): Dropout(p=0, inplace=False)
)
embedding_matrix = model.embeddings.word_embeddings.weight.data.numpy()
embedding_matrix.shape
(30000, 128)
tokenizer.special_tokens_map.values
<function dict.values>
interp = InterpEmbeddingsTokenizer(
    embedding_matrix,tokenizer=tokenizer)
interp.search("wife")