-
Notifications
You must be signed in to change notification settings - Fork 0
/
Word2VecVisualization.py
44 lines (33 loc) · 1.41 KB
/
Word2VecVisualization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# for the visualization of the word2vec model
import os
import numpy as np
from gensim.models import Word2Vec
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
DATA_PATH = os.path.expanduser("~/NLP_Data/")
DIMENSIONS = 300
# loading your gensim
model = Word2Vec.load(DATA_PATH + "word2vec_model")
# project part of vocab
with open(DATA_PATH + "./projector/metadata.tsv", 'w+') as file_metadata:
w2v_10K = np.zeros((len(model.wv.index2word), DIMENSIONS))
for i, word in enumerate(model.wv.index2word):
w2v_10K[i] = model[word]
file_metadata.write(word.encode('utf-8') + '\n')
# define the model without training
sess = tf.InteractiveSession()
with tf.device("/cpu:0"):
embedding = tf.Variable(w2v_10K, trainable=False, name='lyrics_embeddings')
tf.global_variables_initializer().run()
saver = tf.train.Saver()
writer = tf.summary.FileWriter(DATA_PATH + './projector', sess.graph)
# adding into projector
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = 'lyrics_embeddings'
embed.metadata_path = DATA_PATH + './projector/metadata.tsv'
# Specify the width and height of a single thumbnail.
projector.visualize_embeddings(writer, config)
saver.save(sess, DATA_PATH + './projector/lyrics_model.ckpt', global_step=10000)
# open tensorboard with logdir, check localhost:6006 for viewing your embedding.
# tensorboard --logdir="./projector/"