MASKD
[Getting Started Notebook] MASKD Challange
This is a Baseline Code to get you started with the challenge.
This dataset and notebook correspond to the MASKD Challenge being held on AIrowd.
Authors: Gauransh Kumar, Shraddhaa Mohan, Rohit Midha¶
Downloads and Installations¶
# download the MaskRCNN repository
!git clone https://github.com/matterport/Mask_RCNN.git
# install
!cd Mask_RCNN; python setup.py install
# install pycocotools
!pip uninstall -q pycocotools -y
!pip install -q git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI
!pip install numpy==1.17.5
!pip uninstall pycocotools
!pip install pycocotools --no-binary pycocotools
!pip install 'h5py==2.10.0' --force-reinstall
Restart Runtime here!
# install a stable keras version
!pip install -q keras==2.2.5
!cd Mask_RCNN; wget -q https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5
import sys
!{sys.executable} -m pip install aicrowd-cli
%load_ext aicrowd.magic
%aicrowd login
DOWNLOAD DATASET
#Donwload the datasets
%aicrowd ds dl -c maskd
!unzip -q train_images.zip
!unzip -q val_images.zip
!unzip -q test_images.zip
Imports¶
%tensorflow_version 1.x
import os
import sys
import itertools
import math
import logging
import json
import re
import random
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
import imgaug.augmenters as iaa
from matplotlib.patches import Polygon
from collections import OrderedDict
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as maskUtils
# Root directory of the project
ROOT_DIR = os.path.abspath("/content/Mask_RCNN")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
from mrcnn.config import Config
%matplotlib inline
import tensorflow
print(tensorflow.__version__)
import keras
print(keras.__version__)
Explanatory Data Analysis¶
Let us Inspect the data we have first before moving on to building the model.
##Create Mask Dataset Class
class MaskDataset(utils.Dataset):
def load_dataset(self, dataset_dir,dtype,return_coco=True):
""" Loads Mask dataset
Params:
- dataset_dir : root directory of the dataset (can point to the train/val folder)
- dtype: specifies train or val
- load_small : Boolean value which signals if the annotations for all the images need to be loaded into the memory,
or if only a small subset of the same should be loaded into memory
"""
self.dtype = dtype
if self.dtype=="train":
annotation_path = os.path.join(dataset_dir, "train.json")
image_dir = os.path.join(dataset_dir, "train_images")
elif self.dtype=="val":
annotation_path = os.path.join(dataset_dir, "val.json")
image_dir = os.path.join(dataset_dir, "val_images")
print("Annotation Path ", annotation_path)
print("Image Dir ", image_dir)
assert os.path.exists(annotation_path) and os.path.exists(image_dir)
self.coco = COCO(annotation_path)
self.image_dir = image_dir
# Load all classes (Only Building in this version)
classIds = self.coco.getCatIds()
# Load all images
image_ids = list(self.coco.imgs.keys())
# register classes
for _class_id in classIds:
self.add_class("mask-detection", _class_id, self.coco.loadCats(_class_id)[0]["name"])
# Register Images
for _img_id in image_ids:
assert(os.path.exists(os.path.join(image_dir, self.coco.imgs[_img_id]['file_name'])))
self.add_image(
"mask-detection", image_id=_img_id,
path=os.path.join(image_dir, self.coco.imgs[_img_id]['file_name']),
width=self.coco.imgs[_img_id]["width"],
height=self.coco.imgs[_img_id]["height"],
annotations=self.coco.loadAnns(self.coco.getAnnIds(
imgIds=[_img_id],
catIds=classIds,
iscrowd=None)))
if return_coco:
return self.coco
def load_mask(self, image_id):
""" Loads instance mask for a given image
This function converts mask from the coco format to a
a bitmap [height, width, instance]
Params:
- image_id : reference id for a given image
Returns:
masks : A bool array of shape [height, width, instances] with
one mask per instance
class_ids : a 1D array of classIds of the corresponding instance masks
"""
image_info = self.image_info[image_id]
assert image_info["source"] == "mask-detection"
instance_masks = []
class_ids = []
annotations = self.image_info[image_id]["annotations"]
# Build mask of shape [height, width, instance_count] and list
# of class IDs that correspond to each channel of the mask.
for annotation in annotations:
class_id = self.map_source_class_id(
"mask-detection.{}".format(annotation['category_id']))
if class_id:
m = self.annToMask(annotation, image_info["height"],
image_info["width"])
# Some objects are so small that they're less than 1 pixel area
# and end up rounded out. Skip those objects.
if m.max() < 1:
continue
# Ignore the notion of "is_crowd" as specified in the coco format
# as we donot have the said annotation in the current version of the dataset
instance_masks.append(m)
class_ids.append(class_id)
# Pack instance masks into an array
if class_ids:
mask = np.stack(instance_masks, axis=2)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
else:
# Call super class to return an empty mask
return super(MaskDataset, self).load_mask(image_id)
def image_reference(self, image_id):
"""Return a reference for a particular image
Ideally you this function is supposed to return a URL
but in this case, we will simply return the image_id
"""
return "mask-detection::{}".format(image_id)
# The following two functions are from pycocotools with a few changes.
def annToRLE(self, ann, height, width):
"""
Convert annotation which can be polygons, uncompressed RLE to RLE.
:return: binary mask (numpy 2D array)
"""
segm = ann['segmentation']
if isinstance(segm, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(segm, height, width)
rle = maskUtils.merge(rles)
elif isinstance(segm['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(segm, height, width)
else:
# rle
rle = ann['segmentation']
return rle
def annToMask(self, ann, height, width):
"""
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
:return: binary mask (numpy 2D array)
"""
rle = self.annToRLE(ann, height, width)
m = maskUtils.decode(rle)
return m
The MaskRCNN repository works on configs. For trainin we create a config sa shown below.
class MaskConfig(Config):
"""Configuration for training on data in MS COCO format.
Derives from the base Config class and overrides values specific
to the COCO dataset. Edit here to find optimum parameters
"""
# Give the configuration a recognizable name
NAME = "mask-detection"
# We use a GPU with 12GB memory, which can fit two images.
# Adjust down if you use a smaller GPU.
IMAGES_PER_GPU = 2
# Comment to train on 8 GPUs (default is 1)
GPU_COUNT = 1
BACKBONE = 'resnet50'
# Number of classes (including background)
NUM_CLASSES = 3 # 1 Background + 2 classes(mask/no_mask)
STEPS_PER_EPOCH=150
VALIDATION_STEPS=50
MAX_GT_INSTANCES=35
LEARNING_RATE=0.01
IMAGE_MAX_DIM=256
IMAGE_MIN_DIM=256
MINI_MASK_SHAPE=(128,128)
# Mask Dataset
config = MaskConfig()
DATASET_DIR = "/content/"
config.display()
dataset = MaskDataset()
dataset.load_dataset(DATASET_DIR, "train")
# Must call before using the dataset
dataset.prepare()
print("[INFO] Image Count: {}".format(len(dataset.image_ids)))
print("[INFO] Class Count: {}".format(dataset.num_classes))
for i, info in enumerate(dataset.class_info):
print("{:3}. {:50}".format(i, info['name']))
Samples¶
Load and display some sample images and masks.
image_ids = np.random.choice(dataset.image_ids, 4)
for image_id in image_ids:
print("[INFO] Image ID: {}".format(image_id))
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset.class_names)
Bounding Boxes¶
Rather than using bounding box coordinates provided by the source datasets, we compute the bounding boxes from masks instead. This allows us to handle bounding boxes consistently regardless of the source dataset, and it also makes it easier to resize, rotate, or crop images because we simply generate the bounding boxes from the updated masks rather than computing bounding box transformation for each type of image transformation.
# Load random image and mask.
image_id = np.random.choice(dataset.image_ids, 1)[0]
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
print("[INFO] Image Shape: {} \tClass ID : {}".format(mask.shape, class_ids))
# Compute Bounding box
bbox = utils.extract_bboxes(mask)
# Display image and additional stats
print("[INFO] Image ID: {} \tDataset Reference: {}".format(image_id, dataset.image_reference(image_id)))
log("[INFO] Image", image)
log("[INFO] Mask", mask)
log("[INFO] Class IDs", class_ids)
log("[INFO] BBOX", bbox)
# Display image and instances
visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names)