Spaces:

darshanjani
/

YoloV3-from-Scratch

Runtime error

App Files Files Community

YoloV3-from-Scratch / Utilities /dataset.py

darshanjani

utils function for inference

3a0062c over 2 years ago

raw

history blame contribute delete

11 kB

	import os
	import random

	import numpy as np
	import pandas as pd
	import pytorch_lightning as pl
	import torch
	from PIL import Image, ImageFile
	from torch.utils.data import DataLoader, Dataset
	from torchvision.transforms import Resize

	from . import config, transforms
	from .utils import cells_to_bboxes
	from .utils import iou_width_height as iou
	from .utils import non_max_suppression as nms
	from .utils import plot_image, xyxy2xywhn, xywhn2xyxy


	ImageFile.LOAD_TRUNCATED_IMAGES = True

	class YOLODataset(Dataset):
	def __init__(
	self,
	csv_file,
	img_dir,
	label_dir,
	anchors,
	image_size=416,
	S=[13, 26, 52],
	C=20,
	transform=None,
	mosaic_percentage=0.67,
	):
	self.annotations = pd.read_csv(csv_file)
	self.img_dir = img_dir
	self.label_dir = label_dir
	self.image_size = image_size
	self.mosaic_border = [image_size // 2, image_size // 2]
	self.transform = transform
	self.S = S
	self.anchors = torch.tensor(
	anchors[0] + anchors[1] + anchors[2]
	) # for all 3 scales
	self.num_anchors = self.anchors.shape[0]
	self.num_anchors_per_scale = self.num_anchors // 3
	self.C = C
	self.ignore_iou_thresh = 0.5
	self.mosaic_percentage = mosaic_percentage

	def __len__(self):
	return len(self.annotations)

	def load_mosaic(self, index):
	# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
	labels4 = []
	s = self.image_size
	yc, xc = (
	int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border
	) # mosaic center x, y
	indices = [index] + random.choices(
	range(len(self)), k=3
	) # 3 additional image indices
	random.shuffle(indices)
	for i, index in enumerate(indices):
	# Load image
	label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
	bboxes = np.roll(
	np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
	).tolist()
	img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
	img = np.array(Image.open(img_path).convert("RGB"))

	h, w = img.shape[0], img.shape[1]
	labels = np.array(bboxes)

	# place img in img4
	if i == 0: # top left
	img4 = np.full(
	(s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
	) # base image with 4 tiles
	x1a, y1a, x2a, y2a = (
	max(xc - w, 0),
	max(yc - h, 0),
	xc,
	yc,
	) # xmin, ymin, xmax, ymax (large image)
	x1b, y1b, x2b, y2b = (
	w - (x2a - x1a),
	h - (y2a - y1a),
	w,
	h,
	) # xmin, ymin, xmax, ymax (small image)
	elif i == 1: # top right
	x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
	x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
	elif i == 2: # bottom left
	x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
	x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
	elif i == 3: # bottom right
	x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
	x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

	img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
	padw = x1a - x1b
	padh = y1a - y1b

	# Labels
	if labels.size:
	labels[:, :-1] = xywhn2xyxy(
	labels[:, :-1], w, h, padw, padh
	) # normalized xywh to pixel xyxy format
	labels4.append(labels)

	# Concat/clip labels
	labels4 = np.concatenate(labels4, 0)
	for x in (labels4[:, :-1],):
	np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
	# img4, labels4 = replicate(img4, labels4) # replicate
	labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
	labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
	labels4 = labels4[labels4[:, 2] > 0]
	labels4 = labels4[labels4[:, 3] > 0]
	return img4, labels4

	def load_single_img(self, index):
	label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
	bboxes = np.roll(
	np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
	).tolist()
	img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
	image = np.array(Image.open(img_path).convert("RGB"))
	return image, bboxes

	def __getitem__(self, index):
	if random.random() < self.mosaic_percentage:
	image, bboxes = self.load_mosaic(index)
	else:
	image, bboxes = self.load_single_img(index)

	if self.transform:
	augmentations = self.transform(image=image, bboxes=bboxes)
	image = augmentations["image"]
	bboxes = augmentations["bboxes"]

	# e.g. = (3, 13, 13, 6), (3, 26, 26, 6), (3, 52, 52, 6) \|\| 6 = [x, y, w, h, obj, class] for each anchor box
	targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
	for box in bboxes:
	iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
	anchor_indices = iou_anchors.argsort(descending=True, dim=0)
	x, y, width, height, class_label = box
	has_anchor = [False] * 3 # each scale should have one anchor
	for anchor_idx in anchor_indices:
	scale_idx = anchor_idx // self.num_anchors_per_scale
	anchor_on_scale = anchor_idx % self.num_anchors_per_scale
	S = self.S[scale_idx]
	i, j = int(S * y), int(S * x) # which cell
	anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
	if not anchor_taken and not has_anchor[scale_idx]:
	targets[scale_idx][anchor_on_scale, i, j, 0] = 1
	x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
	width_cell, height_cell = (
	width * S,
	height * S,
	) # can be greater than 1 since it's relative to cell
	box_coordinates = torch.tensor(
	[x_cell, y_cell, width_cell, height_cell]
	)
	targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
	targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
	has_anchor[scale_idx] = True

	elif (
	not anchor_taken
	and iou_anchors[anchor_idx] > self.ignore_iou_thresh
	):
	targets[scale_idx][
	anchor_on_scale, i, j, 0
	] = -1 # ignore prediction

	return image, tuple(targets)

	class YOLODataModule(pl.LightningDataModule):
	def __init__(self, train_csv_path, test_csv_path):
	super().__init__()
	self.train_csv_path = train_csv_path
	self.test_csv_path = test_csv_path
	self.train_dataset = None
	self.eval_dataset = None
	self.test_dataset = None

	def setup(self, stage=None):
	self.train_dataset = YOLODataset(
	self.train_csv_path,
	transform=transforms.train_transforms,
	S=[
	config.IMAGE_SIZE // 32,
	config.IMAGE_SIZE // 16,
	config.IMAGE_SIZE // 8
	],
	img_dir=config.IMG_DIR,
	label_dir=config.LABEL_DIR,
	anchors=config.ANCHORS,
	mosaic_percentage=config.TRAIN_MOSAIC_PERCENTAGE
	)

	self.eval_dataset = YOLODataset(
	self.train_csv_path,
	transform=transforms.test_transforms,
	S=[
	config.IMAGE_SIZE // 32,
	config.IMAGE_SIZE // 16,
	config.IMAGE_SIZE // 8
	],
	img_dir=config.IMG_DIR,
	label_dir=config.LABEL_DIR,
	anchors=config.ANCHORS,
	mosaic_percentage=config.TRAIN_MOSAIC_PERCENTAGE # should be 0?
	)

	self.test_dataset = YOLODataset(
	self.test_csv_path,
	transform=transforms.test_transforms,
	S=[
	config.IMAGE_SIZE // 32,
	config.IMAGE_SIZE // 16,
	config.IMAGE_SIZE // 8
	],
	img_dir=config.IMG_DIR,
	label_dir=config.LABEL_DIR,
	anchors=config.ANCHORS,
	mosaic_percentage=config.TEST_MOSAIC_PERCENTAGE
	)

	def train_dataloader(self):
	return DataLoader(
	dataset=self.train_dataset,
	batch_size=config.BATCH_SIZE,
	shuffle=True,
	num_workers=config.NUM_WORKERS,
	pin_memory=config.PIN_MEMORY,
	drop_last=False
	)
	def val_dataloader(self):
	return DataLoader(
	dataset=self.eval_dataset,
	batch_size=config.BATCH_SIZE,
	shuffle=False,
	num_workers=config.NUM_WORKERS,
	pin_memory=config.PIN_MEMORY,
	drop_last=False
	)
	def test_dataloader(self):
	return DataLoader(
	dataset=self.test_dataset,
	batch_size=config.BATCH_SIZE,
	shuffle=False,
	num_workers=config.NUM_WORKERS,
	pin_memory=config.PIN_MEMORY,
	drop_last=False
	)

	def test():

	anchors = config.ANCHORS

	transform = config.test_transforms

	dataset = YOLODataset(
	"../data/PASCAL_VOC/2examples.csv",
	"../data/PASCAL_VOC/images",
	"../data/PASCAL_VOC/labels",
	S=[13, 26, 52],
	anchors=anchors,
	transform=transform
	)

	S = [13, 26, 52]
	scaled_anchors = torch.tensor(anchors) / (
	1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
	)

	loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
	for x, y in loader:
	boxes = []

	for i in range(y[0].shape[1]):
	anchor = scaled_anchors[i]
	print(anchor.shape)
	print(y[i].shape)
	boxes += cells_to_bboxes(
	y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
	)[0]
	boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
	print(boxes)
	plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)

	if __name__ == "__main__":
	test()