-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_rnn_updated.py
executable file
·132 lines (109 loc) · 4.66 KB
/
test_rnn_updated.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import joblib
import torch
import torch.nn as nn
import torch.optim as optim
from nltk.tokenize import word_tokenize
from torch.optim import Adam
from torch.utils import data
import config
from dataset_updated import Dataset
from rnn import SpeakerClassifier
from utils import (generate_batch_vectors, generate_sent_vector,
get_boundary_mapping, get_word_vector, load_model,
print_evaluation_score, read_data_from_csv)
use_attention = config.USE_ATTENTION
# CUDA settings
use_cuda = torch.cuda.is_available()
device = torch.device(config.DEVICE)
if __name__ == '__main__':
# Evaluation data
predicted = []
actual = []
# Load trained models and data
print("Loading models...")
if use_attention is True and config.EQUALIZE_CLASS_COUNTS is True:
print("\tUsing attention!")
print("\tEqualized class counts!")
model1 = torch.load(config.RNN_EQ_ATTENTION_MODEL1)
model2 = torch.save(config.RNN_EQ_ATTENTION_MODEL2)
classifier = torch.save(config.RNN_EQ_ATTENTION_CLASSIFIER)
elif use_attention is True and config.EQUALIZE_CLASS_COUNTS is False:
print("\tUsing attention!")
model1 = torch.save(config.RNN_ATTENTION_MODEL1)
model2 = torch.save(config.RNN_ATTENTION_MODEL2)
classifier = torch.save(config.RNN_ATTENTION_CLASSIFIER)
elif use_attention is False and config.EQUALIZE_CLASS_COUNTS is True:
print("\tEqualized class counts!")
model1 = torch.save(config.RNN_EQ_MODEL1)
model2 = torch.save(config.RNN_EQ_MODEL2)
classifier = torch.save(config.RNN_EQ_CLASSIFIER)
else:
model1 = torch.save(config.RNN_MODEL1)
model2 = torch.save(config.RNN_MODEL2)
classifier = torch.save(config.RNN_CLASSIFIER)
print("Loading pre-trained embeddings...")
w2v_model = load_model(config.PATH_TO_PRETRAINED_EMBEDDINGS)
print("Loading testing data...")
test_data = read_data_from_csv(
filename=config.CSV_FILENAME_TEST,
train=False,
equalize=False
)
print("\tTotal length of test data: {}".format(len(test_data)))
print("\tNumber of SAME records: {}".format(len([a for a in test_data if a['boundary'] == '[SAME]'])))
print("\tNumber of CHANGE records: {}".format(len([a for a in test_data if a['boundary'] == '[CHANGE]'])))
print("Creating data generator...")
test_set = Dataset(test_data)
test_generator = data.DataLoader(
dataset=test_set,
drop_last=True,
batch_size=1,
shuffle=False)
# GPU
classifier = classifier.to(device)
model1.to(device)
model2.to(device)
# Set parameters to eval mode
model1.eval()
model2.eval()
classifier.eval()
print("Evaluating model...")
index = 0
for sent1, sent2, boundary in test_generator:
if index % 30000 == 0 and index > 0:
print("\t{}/{} records processed!".format(index, len(test_data)))
# If the model was trained using attention
if use_attention is True:
# when using attention, seq_len for both sentences need to be the same
max_sent_len = max(
max([len(word_tokenize(a)) for a in sent1]),
max([len(word_tokenize(a)) for a in sent2]))
sent1_vectors = generate_batch_vectors(sent1, w2v_model, max_sent_len=max_sent_len)
sent2_vectors = generate_batch_vectors(sent2, w2v_model, max_sent_len=max_sent_len)
# Model without attention
else:
sent1_vectors = generate_batch_vectors(sent1, w2v_model)
sent2_vectors = generate_batch_vectors(sent2, w2v_model)
boundary = get_boundary_mapping(boundary)
# to GPU
sent1_vectors = sent1_vectors.to(device)
sent2_vectors = sent2_vectors.to(device)
boundary = torch.Tensor(boundary).to(device)
# forward
output1, hidden1 = model1(sent1_vectors)
output2, hidden2 = model2(sent2_vectors)
# attention does not combine output
if use_attention is True:
output = classifier(output1, output2)
if use_attention is False:
hidden1 = hidden1.squeeze(dim=0)
hidden2 = hidden2.squeeze(dim=0)
combined_hidden = torch.cat([hidden1, hidden2], dim=1)
output = classifier(combined_hidden)
output = output.squeeze(dim=1)
pred = int(output.item() >= 0.5)
y_true = boundary.item()
predicted.append(pred)
actual.append(y_true)
index += 1
print_evaluation_score(actual, predicted)