-
Notifications
You must be signed in to change notification settings - Fork 1
/
model.py
103 lines (85 loc) · 3.95 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import numpy as np
import torch
from torch.utils.data import DataLoader
import clip
from download_dataset import download_pet_images, update_labels, get_labels, get_pet_classes
from sklearn.linear_model import LogisticRegression
import pickle
from my_utils import device, clip_model, clip_preprocess
def clip_zero_shot(image_input, k=5):
# get all pet classes
pet_classes = get_pet_classes()
# put text to match to image in device memory
text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}, a type of pet.") for c in pet_classes]).to(device)
# Calculate features
with torch.no_grad():
image_features = clip_model.encode_image(image_input) # compute image features with CLIP model
text_features = clip_model.encode_text(text_inputs) # compute text features with CLIP model
image_features /= image_features.norm(dim=-1, keepdim=True) # unit-normalize image features
text_features /= text_features.norm(dim=-1, keepdim=True) # unit-normalize text features
# Pick the top 5 most similar labels for the image
similarity = (100.0 * image_features @ text_features.T) # score is cosine similarity times 100
p_class_given_image = similarity.softmax(dim=-1) # P(y|x) is score through softmax
values, indices = p_class_given_image[0].topk(k) # gets the top 5 labels
return values, indices
def get_features(data_set, encoder = clip_model):
data_loader = DataLoader(data_set, batch_size=64, shuffle=False) # dataloader lets you process in batch which is way faster
image_features = []
labels = []
# Extract CLIP features for each image in the dataset
for images, labels_batch in data_loader:
# Move images to the device where the model is located
images = images.to(device)
# Extract CLIP features for the images
with torch.no_grad():
features = encoder.encode_image(images)
# Append the features and labels to the lists
image_features.append(features.cpu().numpy())
labels.append(labels_batch.numpy())
# Concatenate the features and labels into numpy arrays
image_features = np.concatenate(image_features)
labels = np.concatenate(labels)
return image_features, labels
def train_logistic_regression(transform=clip_preprocess):
pet_train_trans, _ = download_pet_images(img_transform=transform)
labels_df = get_labels(is_train=True)
pet_train_final = update_labels(pet_train_trans, labels_df)
train_features, train_labels = get_features(pet_train_final)
clf = LogisticRegression(random_state=0, max_iter=500)
clf.fit(train_features, train_labels)
# Save the model to a file
folder_path = "model"
if not os.path.exists(folder_path):
# If it doesn't exist, create the folder
os.makedirs(folder_path)
model_file_name = 'clip_linear_probe.pkl'
# Combine the folder path and model file name
model_path = os.path.join(folder_path, model_file_name)
with open(model_path, 'wb') as file:
pickle.dump(clf, file)
return clf
def linear_probe(image_input, k=5):
# Check if the file exists
model_path = 'model/clip_linear_probe.pkl'
if os.path.exists(model_path):
with open(model_path, 'rb') as file:
clf = pickle.load(file)
else:
clf = train_logistic_regression()
# Calculate features
with torch.no_grad():
image_features = clip_model.encode_image(image_input) # compute image features with CLIP model
y_probabilities = clf.predict_proba(image_features)
indices = np.argsort(y_probabilities[0])[::-1][:k]
values = y_probabilities[0][indices].tolist()
return values, indices
def get_yolo5(model_type='m'):
torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
return torch.hub.load('ultralytics/yolov5',
'yolov5{}'.format(model_type),
pretrained=True
)
def get_preds(img):
model = get_yolo5()
return model([img]).xyxy[0].numpy()