Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How can I test it on arbitrary RGB image? #10

Open
aligoglos opened this issue Aug 8, 2020 · 5 comments
Open

How can I test it on arbitrary RGB image? #10

aligoglos opened this issue Aug 8, 2020 · 5 comments

Comments

@aligoglos
Copy link

I've tried to write demo code but I got stuck how to interpreter output of network:

import argparse
import os
import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from config import cfg
from config import update_config
from core.inference import get_final_preds
from utils.vis import save_debug_images
import glob
from models.pose_hrnet import get_pose_net

def parse_args():
	parser = argparse.ArgumentParser(description='Train keypoints network')
	# general
	parser.add_argument('--cfg',
						help='experiment configure file name',
						default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml',
						type=str)

	parser.add_argument('opts',
						help="Modify config options using the command-line",
						default=None,
						nargs=argparse.REMAINDER)

	parser.add_argument('--modelDir',
						help='model directory',
						type=str,
						default='')
	parser.add_argument('--logDir',
						help='log directory',
						type=str,
						default='')
	parser.add_argument('--dataDir',
						help='data directory',
						type=str,
						default='./Inputs/')
	parser.add_argument('--prevModelDir',
						help='prev Model directory',
						type=str,
						default='')

	args = parser.parse_args()
	return args

def save_images(img, joints_pred, name,nrow=8, padding=2):
	height = int(img.size(0) + padding)
	width = int(img.size(1) + padding)
	nmaps = 1
	xmaps = min(nrow, nmaps)
	ymaps = int(math.ceil(float(nmaps) / xmaps))
	height = int(batch_image.size(2) + padding)
	width = int(batch_image.size(3) + padding)
	k = 0
	for y in range(ymaps):
		for x in range(xmaps):
			if k >= nmaps:
				break
			joints = batch_joints[k]
			joints_vis = batch_joints_vis[k]
			for joint in joints:
				joint[0] = x * width + padding + joint[0]
				joint[1] = y * height + padding + joint[1]
				cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
			k = k + 1
	cv2.imwrite(f"Results/{name}", img)

def main():
	normalize = transforms.Normalize(
			mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
		)
	transform = transforms.Compose([
		transforms.ToTensor(),
		normalize,
	])
	args = parse_args()
	update_config(cfg, args)
	image_size = np.array(cfg.MODEL.IMAGE_SIZE)

	model = get_pose_net(
		cfg, is_train=False
	)

	if cfg.TEST.MODEL_FILE:
		model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
	else:
		model_state_file = os.path.join(
			final_output_dir, 'final_state.pth'
		)
		model.load_state_dict(torch.load(model_state_file))

	model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
	
	img_path_l = sorted(glob.glob('./Inputs' + '/*'))
	with torch.no_grad():
		for path in img_path_l:
			name  = path.split('/')[-1]
			image = cv2.imread(path)
			image = cv2.resize(image, (384, 288))
			input = transform(image).unsqueeze(0)
			#print(input.shape)
			outputs = model(input)
			if isinstance(outputs, list):
				output = outputs[-1]
			else:
				output = outputs
			print(f"{name} : {output.shape}")
	

if __name__ == '__main__':
	main()

I don't know what I set scale and center in get_final_preds .

@hbin-ac
Copy link
Contributor

hbin-ac commented Aug 17, 2020

I've tried to write demo code but I got stuck how to interpreter output of network:

import argparse
import os
import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from config import cfg
from config import update_config
from core.inference import get_final_preds
from utils.vis import save_debug_images
import glob
from models.pose_hrnet import get_pose_net

def parse_args():
	parser = argparse.ArgumentParser(description='Train keypoints network')
	# general
	parser.add_argument('--cfg',
						help='experiment configure file name',
						default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml',
						type=str)

	parser.add_argument('opts',
						help="Modify config options using the command-line",
						default=None,
						nargs=argparse.REMAINDER)

	parser.add_argument('--modelDir',
						help='model directory',
						type=str,
						default='')
	parser.add_argument('--logDir',
						help='log directory',
						type=str,
						default='')
	parser.add_argument('--dataDir',
						help='data directory',
						type=str,
						default='./Inputs/')
	parser.add_argument('--prevModelDir',
						help='prev Model directory',
						type=str,
						default='')

	args = parser.parse_args()
	return args

def save_images(img, joints_pred, name,nrow=8, padding=2):
	height = int(img.size(0) + padding)
	width = int(img.size(1) + padding)
	nmaps = 1
	xmaps = min(nrow, nmaps)
	ymaps = int(math.ceil(float(nmaps) / xmaps))
	height = int(batch_image.size(2) + padding)
	width = int(batch_image.size(3) + padding)
	k = 0
	for y in range(ymaps):
		for x in range(xmaps):
			if k >= nmaps:
				break
			joints = batch_joints[k]
			joints_vis = batch_joints_vis[k]
			for joint in joints:
				joint[0] = x * width + padding + joint[0]
				joint[1] = y * height + padding + joint[1]
				cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
			k = k + 1
	cv2.imwrite(f"Results/{name}", img)

def main():
	normalize = transforms.Normalize(
			mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
		)
	transform = transforms.Compose([
		transforms.ToTensor(),
		normalize,
	])
	args = parse_args()
	update_config(cfg, args)
	image_size = np.array(cfg.MODEL.IMAGE_SIZE)

	model = get_pose_net(
		cfg, is_train=False
	)

	if cfg.TEST.MODEL_FILE:
		model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
	else:
		model_state_file = os.path.join(
			final_output_dir, 'final_state.pth'
		)
		model.load_state_dict(torch.load(model_state_file))

	model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
	
	img_path_l = sorted(glob.glob('./Inputs' + '/*'))
	with torch.no_grad():
		for path in img_path_l:
			name  = path.split('/')[-1]
			image = cv2.imread(path)
			image = cv2.resize(image, (384, 288))
			input = transform(image).unsqueeze(0)
			#print(input.shape)
			outputs = model(input)
			if isinstance(outputs, list):
				output = outputs[-1]
			else:
				output = outputs
			print(f"{name} : {output.shape}")
	

if __name__ == '__main__':
	main()

I don't know what I set scale and center in get_final_preds .

cv2.resize hided scale and center.

@aligoglos
Copy link
Author

What should I do?

@q5390498
Copy link

q5390498 commented Sep 5, 2020

did you solved this problem?

@Kuekua
Copy link

Kuekua commented Mar 11, 2021

I find follow code from mmpose project, maybe it can sovle this problem:

`
def _get_multi_scale_size(image, input_size, current_scale, min_scale):
    """Get the size for multi-scale training
    Args:
        image: Input image.
        input_size (int): Size of the image input.
        current_scale (float): Scale factor.
        min_scale (float): Minimal scale.

    Returns:
        tuple: A tuple containing multi-scale sizes.

        - (w_resized, h_resized) (tuple(int)): resized width/height
        - center (np.ndarray)image center
        - scale (np.ndarray): scales wrt width/height
    """
    h, w, _ = image.shape

    center = np.array([round(w / 2.0), round(h / 2.0)])

    # calculate the size for min_scale
    min_input_size = _ceil_to_multiples_of(min_scale * input_size, 64)
    if w < h:
        w_resized = int(min_input_size * current_scale / min_scale)
        h_resized = int(
            _ceil_to_multiples_of(min_input_size / w * h, 64) * current_scale /
            min_scale)
        scale_w = w / 200.0
        scale_h = h_resized / w_resized * w / 200.0
    else:
        h_resized = int(min_input_size * current_scale / min_scale)
        w_resized = int(
            _ceil_to_multiples_of(min_input_size / h * w, 64) * current_scale /
            min_scale)
        scale_h = h / 200.0
        scale_w = w_resized / h_resized * h / 200.0

    return (w_resized, h_resized), center, np.array([scale_w, scale_h])
`

@mlantern
Copy link

mlantern commented Jul 8, 2022

I find follow code from this project, maybe it can sovle this problem:

def box_to_center_scale(box, model_image_width, model_image_height):

"""convert a box to center,scale information required for pose transformation
Parameters
----------
box : list of tuple
    list of length 2 with two tuples of floats representing
    bottom left and top right corner of a box
model_image_width : int
model_image_height : int
Returns
-------
(numpy array, numpy array)
    Two numpy arrays, coordinates for the center of the box and the scale of the box
"""

center = np.zeros((2), dtype=np.float32)
bottom_left_corner = box[0]
top_right_corner = box[1]
box_width = top_right_corner[0]-bottom_left_corner[0]
box_height = top_right_corner[1]-bottom_left_corner[1]
bottom_left_x = bottom_left_corner[0]
bottom_left_y = bottom_left_corner[1]
center[0] = bottom_left_x + box_width * 0.5
center[1] = bottom_left_y + box_height * 0.5

aspect_ratio = model_image_width * 1.0 / model_image_height
pixel_std = 200

if box_width > aspect_ratio * box_height:
    box_height = box_width * 1.0 / aspect_ratio
elif box_width < aspect_ratio * box_height:
    box_width = box_height * aspect_ratio
scale = np.array(
    [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
    dtype=np.float32)
if center[0] != -1:
    scale = scale * 1.25

return center, scale

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

5 participants