Loading

Object Detection

Solution for submission 156658

A detailed solution for submission 156658 submitted for challenge Object Detection

BanKhv
%load_ext aicrowd.magic %aicrowd login !rm -rf data !mkdir data %aicrowd ds dl -c object-detection -o data !unzip data/train.zip -d data/train > /dev/null !unzip data/test.zip -d data/test > /dev/null!pip install pyyaml==5.1 !pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
In [1]:
# check pytorch installation: 
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
assert torch.__version__.startswith("1.9")
1.9.0+cu102 True
!pip install google-colab
In [2]:
# Detection
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from glob import glob
from PIL import Image
from natsort import natsorted
from tqdm.notebook import tqdm
#from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.engine import DefaultTrainer
from detectron2.data.datasets import register_coco_instances
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
register_coco_instances("train", {}, "data/xtrain.json", "data/train") register_coco_instances('valid', {}, "data/valid.json", "data/train") vehicle_metadata = MetadataCatalog.get("train") dataset_dicts = DatasetCatalog.get("train") valid_metadata = MetadataCatalog.get("valid") valid_dicts = DatasetCatalog.get("valid")
In [3]:
register_coco_instances("train", {}, "data/train.json", "data/train")

vehicle_metadata = MetadataCatalog.get("train")
dataset_dicts    = DatasetCatalog.get("train")
WARNING [09/16 18:49:33 d2.data.datasets.coco]: 
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[09/16 18:49:33 d2.data.datasets.coco]: Loaded 3000 images in COCO format from data/train.json
dataset_dicts
In [4]:
vehicle_metadata
Out[4]:
namespace(name='train',
          json_file='data/train.json',
          image_root='data/train',
          evaluator_type='coco',
          thing_classes=['bicycle', 'motorcycle', 'passenger_car', 'person'],
          thing_dataset_id_to_contiguous_id={0: 0, 1: 1, 2: 2, 3: 3})
valid_metadataimport random for d in random.sample(dataset_dicts, 3): img = cv2.imread(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata = vehicle_metadata, scale=0.5) vis = visualizer.draw_dataset_dict(d) cv2_imshow(vis.get_image()[:, :, ::-1])https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-Detection Average Precisioncfg = get_cfg() cfg
In [5]:
cfg = get_cfg()

# Setting up model architecture & weightes
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") 

#name_detect = 'faster_rcnn_R_101_C4_3x.yaml'
#cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/"    + name_detect))
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/" + name_detect) 

# Datasets
cfg.DATASETS.TRAIN = ("train",)
#cfg.DATASETS.TEST  = ('valid',)
cfg.DATASETS.TEST  = ()

# Other parameters & hyperparameters
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH   = 4
cfg.SOLVER.BASE_LR         = 0.00025  

#cfg.SOLVER.MAX_ITER = 100  # 0.029
#cfg.SOLVER.MAX_ITER = 200  #  0.055
#cfg.SOLVER.MAX_ITER = 500  # 0.125
#cfg.SOLVER.MAX_ITER = 1000 # 0.195
#cfg.SOLVER.MAX_ITER = 1200 # 0.210
#cfg.SOLVER.MAX_ITER = 1300 # 0.205
#cfg.SOLVER.MAX_ITER = 1400 # 0.219
#cfg.SOLVER.MAX_ITER = 1500 # 0.219
#cfg.SOLVER.MAX_ITER = 1600 # 0.221
#cfg.SOLVER.MAX_ITER = 2000 # 0.242
#cfg.SOLVER.MAX_ITER = 2500 # 0.248
#cfg.SOLVER.MAX_ITER = 3000 # 0.255
#cfg.SOLVER.MAX_ITER = 4000 # 0.263
#cfg.SOLVER.MAX_ITER = 5000 # 0.275
#cfg.SOLVER.MAX_ITER = 6000 # 0.281
cfg.SOLVER.MAX_ITER = 9000 #

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
cfg.MODEL.ROI_HEADS.NUM_CLASSES          = 4
from detectron2.evaluation import COCOEvaluator, inference_on_dataset from detectron2.data import build_detection_test_loader evaluator = COCOEvaluator("valid", cfg, False, output_dir = "./output/") val_loader = build_detection_test_loader(cfg, "valid", mapper = None) inference_on_dataset(trainer.model, val_loader, evaluator)
In [6]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok = True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume = False)
#trainer.train()
[09/16 18:49:37 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res2): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv1): Conv2d(
            64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
      )
      (res3): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv1): Conv2d(
            256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (3): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
      )
      (res4): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
          (conv1): Conv2d(
            512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (3): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (4): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (5): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
      )
      (res5): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
          (conv1): Conv2d(
            1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
      )
    )
  )
  (proposal_generator): RPN(
    (rpn_head): StandardRPNHead(
      (conv): Conv2d(
        256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation): ReLU()
      )
      (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
      (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
    )
    (anchor_generator): DefaultAnchorGenerator(
      (cell_anchors): BufferList()
    )
  )
  (roi_heads): StandardROIHeads(
    (box_pooler): ROIPooler(
      (level_poolers): ModuleList(
        (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True)
        (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True)
        (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True)
        (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True)
      )
    )
    (box_head): FastRCNNConvFCHead(
      (flatten): Flatten(start_dim=1, end_dim=-1)
      (fc1): Linear(in_features=12544, out_features=1024, bias=True)
      (fc_relu1): ReLU()
      (fc2): Linear(in_features=1024, out_features=1024, bias=True)
      (fc_relu2): ReLU()
    )
    (box_predictor): FastRCNNOutputLayers(
      (cls_score): Linear(in_features=1024, out_features=5, bias=True)
      (bbox_pred): Linear(in_features=1024, out_features=16, bias=True)
    )
  )
)
WARNING [09/16 18:49:37 d2.data.datasets.coco]: 
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[09/16 18:49:37 d2.data.datasets.coco]: Loaded 3000 images in COCO format from data/train.json
[09/16 18:49:37 d2.data.build]: Removed 0 images with no usable annotations. 3000 images left.
[09/16 18:49:37 d2.data.build]: Distribution of instances among all 4 categories:
|  category  | #instances   |  category  | #instances   |   category    | #instances   |
|:----------:|:-------------|:----------:|:-------------|:-------------:|:-------------|
|  bicycle   | 1792         | motorcycle | 2133         | passenger_car | 3074         |
|   person   | 1769         |            |              |               |              |
|   total    | 8768         |            |              |               |              |
[09/16 18:49:37 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[09/16 18:49:37 d2.data.build]: Using training sampler TrainingSampler
[09/16 18:49:37 d2.data.common]: Serializing 3000 elements to byte tensors and concatenating them all ...
[09/16 18:49:37 d2.data.common]: Serialized dataset takes 0.96 MiB
WARNING [09/16 18:49:37 d2.solver.build]: SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (5, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (5,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (16, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
The checkpoint state_dict contains keys that are not used by the model:
  proposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}
In [7]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator

class CocoTrainer(DefaultTrainer):

  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder = None):

    if output_folder is None:
        os.makedirs("coco_eval", exist_ok = True)
        output_folder = "coco_eval"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)
In [8]:
# Setting up threshold to filter out some low score predictions
cfg.MODEL.WEIGHTS = "model_14.pth"

# Setting up threshold to filter out some low score predictions
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume = True)
[09/16 18:49:39 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res2): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv1): Conv2d(
            64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
      )
      (res3): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv1): Conv2d(
            256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (3): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
      )
      (res4): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
          (conv1): Conv2d(
            512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (3): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (4): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (5): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
      )
      (res5): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
          (conv1): Conv2d(
            1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
      )
    )
  )
  (proposal_generator): RPN(
    (rpn_head): StandardRPNHead(
      (conv): Conv2d(
        256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation): ReLU()
      )
      (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
      (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
    )
    (anchor_generator): DefaultAnchorGenerator(
      (cell_anchors): BufferList()
    )
  )
  (roi_heads): StandardROIHeads(
    (box_pooler): ROIPooler(
      (level_poolers): ModuleList(
        (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True)
        (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True)
        (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True)
        (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True)
      )
    )
    (box_head): FastRCNNConvFCHead(
      (flatten): Flatten(start_dim=1, end_dim=-1)
      (fc1): Linear(in_features=12544, out_features=1024, bias=True)
      (fc_relu1): ReLU()
      (fc2): Linear(in_features=1024, out_features=1024, bias=True)
      (fc_relu2): ReLU()
    )
    (box_predictor): FastRCNNOutputLayers(
      (cls_score): Linear(in_features=1024, out_features=5, bias=True)
      (bbox_pred): Linear(in_features=1024, out_features=16, bias=True)
    )
  )
)
WARNING [09/16 18:49:40 d2.data.datasets.coco]: 
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[09/16 18:49:40 d2.data.datasets.coco]: Loaded 3000 images in COCO format from data/train.json
[09/16 18:49:40 d2.data.build]: Removed 0 images with no usable annotations. 3000 images left.
[09/16 18:49:40 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[09/16 18:49:40 d2.data.build]: Using training sampler TrainingSampler
[09/16 18:49:40 d2.data.common]: Serializing 3000 elements to byte tensors and concatenating them all ...
[09/16 18:49:40 d2.data.common]: Serialized dataset takes 0.96 MiB
WARNING [09/16 18:49:40 d2.solver.build]: SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.
[09/16 18:49:40 d2.engine.hooks]: Loading scheduler from state_dict ...
In [9]:
trainer.train()
[09/16 18:49:40 d2.engine.train_loop]: Starting training from iteration 8000
/home/vlad/anaconda3/lib/python3.8/site-packages/torch/_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
/home/vlad/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
[09/16 18:49:58 d2.utils.events]:  eta: 0:14:55  iter: 8019  total_loss: 0.5735  loss_cls: 0.1582  loss_box_reg: 0.3534  loss_rpn_cls: 0.014  loss_rpn_loc: 0.06436  time: 0.8747  data_time: 0.0149  lr: 0.00025  max_mem: 4011M
[09/16 18:50:15 d2.utils.events]:  eta: 0:14:06  iter: 8039  total_loss: 0.6572  loss_cls: 0.174  loss_box_reg: 0.3814  loss_rpn_cls: 0.01367  loss_rpn_loc: 0.1232  time: 0.8670  data_time: 0.0082  lr: 0.00025  max_mem: 4011M
[09/16 18:50:33 d2.utils.events]:  eta: 0:13:51  iter: 8059  total_loss: 0.6901  loss_cls: 0.1747  loss_box_reg: 0.3802  loss_rpn_cls: 0.007031  loss_rpn_loc: 0.08188  time: 0.8804  data_time: 0.0078  lr: 0.00025  max_mem: 4011M
[09/16 18:50:52 d2.utils.events]:  eta: 0:13:52  iter: 8079  total_loss: 0.6234  loss_cls: 0.1621  loss_box_reg: 0.3597  loss_rpn_cls: 0.01061  loss_rpn_loc: 0.08448  time: 0.8952  data_time: 0.0084  lr: 0.00025  max_mem: 4011M
[09/16 18:51:09 d2.utils.events]:  eta: 0:13:37  iter: 8099  total_loss: 0.6245  loss_cls: 0.1455  loss_box_reg: 0.373  loss_rpn_cls: 0.009781  loss_rpn_loc: 0.06534  time: 0.8941  data_time: 0.0086  lr: 0.00025  max_mem: 4011M
[09/16 18:51:26 d2.utils.events]:  eta: 0:13:16  iter: 8119  total_loss: 0.6716  loss_cls: 0.1554  loss_box_reg: 0.3901  loss_rpn_cls: 0.01095  loss_rpn_loc: 0.08645  time: 0.8860  data_time: 0.0081  lr: 0.00025  max_mem: 4011M
[09/16 18:51:44 d2.utils.events]:  eta: 0:12:55  iter: 8139  total_loss: 0.6793  loss_cls: 0.1692  loss_box_reg: 0.3884  loss_rpn_cls: 0.01804  loss_rpn_loc: 0.08894  time: 0.8855  data_time: 0.0091  lr: 0.00025  max_mem: 4011M
[09/16 18:52:02 d2.utils.events]:  eta: 0:12:40  iter: 8159  total_loss: 0.7627  loss_cls: 0.1836  loss_box_reg: 0.3988  loss_rpn_cls: 0.01342  loss_rpn_loc: 0.12  time: 0.8884  data_time: 0.0078  lr: 0.00025  max_mem: 4011M
[09/16 18:52:21 d2.utils.events]:  eta: 0:12:29  iter: 8179  total_loss: 0.6563  loss_cls: 0.1704  loss_box_reg: 0.3872  loss_rpn_cls: 0.01648  loss_rpn_loc: 0.0735  time: 0.8924  data_time: 0.0077  lr: 0.00025  max_mem: 4011M
[09/16 18:52:39 d2.utils.events]:  eta: 0:12:04  iter: 8199  total_loss: 0.6156  loss_cls: 0.1343  loss_box_reg: 0.373  loss_rpn_cls: 0.01072  loss_rpn_loc: 0.08581  time: 0.8935  data_time: 0.0095  lr: 0.00025  max_mem: 4011M
[09/16 18:52:58 d2.utils.events]:  eta: 0:11:54  iter: 8219  total_loss: 0.6017  loss_cls: 0.1433  loss_box_reg: 0.3784  loss_rpn_cls: 0.01113  loss_rpn_loc: 0.05294  time: 0.8993  data_time: 0.0084  lr: 0.00025  max_mem: 4011M
[09/16 18:53:17 d2.utils.events]:  eta: 0:11:40  iter: 8239  total_loss: 0.6673  loss_cls: 0.1864  loss_box_reg: 0.3835  loss_rpn_cls: 0.01176  loss_rpn_loc: 0.08887  time: 0.9053  data_time: 0.0087  lr: 0.00025  max_mem: 4011M
[09/16 18:53:36 d2.utils.events]:  eta: 0:11:22  iter: 8259  total_loss: 0.6379  loss_cls: 0.129  loss_box_reg: 0.3737  loss_rpn_cls: 0.01215  loss_rpn_loc: 0.08359  time: 0.9065  data_time: 0.0079  lr: 0.00025  max_mem: 4011M
[09/16 18:53:54 d2.utils.events]:  eta: 0:11:03  iter: 8279  total_loss: 0.6479  loss_cls: 0.1696  loss_box_reg: 0.3582  loss_rpn_cls: 0.01653  loss_rpn_loc: 0.07848  time: 0.9064  data_time: 0.0080  lr: 0.00025  max_mem: 4011M
[09/16 18:54:13 d2.utils.events]:  eta: 0:10:47  iter: 8299  total_loss: 0.5761  loss_cls: 0.146  loss_box_reg: 0.353  loss_rpn_cls: 0.01674  loss_rpn_loc: 0.07485  time: 0.9109  data_time: 0.0086  lr: 0.00025  max_mem: 4011M
[09/16 18:54:31 d2.utils.events]:  eta: 0:10:27  iter: 8319  total_loss: 0.6723  loss_cls: 0.1684  loss_box_reg: 0.3902  loss_rpn_cls: 0.01073  loss_rpn_loc: 0.08398  time: 0.9093  data_time: 0.0075  lr: 0.00025  max_mem: 4011M
[09/16 18:54:50 d2.utils.events]:  eta: 0:10:10  iter: 8339  total_loss: 0.6195  loss_cls: 0.1618  loss_box_reg: 0.3489  loss_rpn_cls: 0.0113  loss_rpn_loc: 0.08211  time: 0.9106  data_time: 0.0077  lr: 0.00025  max_mem: 4011M
[09/16 18:55:08 d2.utils.events]:  eta: 0:09:54  iter: 8359  total_loss: 0.6803  loss_cls: 0.1874  loss_box_reg: 0.3771  loss_rpn_cls: 0.01479  loss_rpn_loc: 0.09496  time: 0.9120  data_time: 0.0077  lr: 0.00025  max_mem: 4011M
[09/16 18:55:28 d2.utils.events]:  eta: 0:09:38  iter: 8379  total_loss: 0.5783  loss_cls: 0.1455  loss_box_reg: 0.3738  loss_rpn_cls: 0.0132  loss_rpn_loc: 0.07668  time: 0.9142  data_time: 0.0082  lr: 0.00025  max_mem: 4011M
[09/16 18:55:46 d2.utils.events]:  eta: 0:09:19  iter: 8399  total_loss: 0.6001  loss_cls: 0.1503  loss_box_reg: 0.3447  loss_rpn_cls: 0.01371  loss_rpn_loc: 0.09153  time: 0.9146  data_time: 0.0078  lr: 0.00025  max_mem: 4011M
[09/16 18:56:05 d2.utils.events]:  eta: 0:09:01  iter: 8419  total_loss: 0.5184  loss_cls: 0.1259  loss_box_reg: 0.326  loss_rpn_cls: 0.009497  loss_rpn_loc: 0.0822  time: 0.9153  data_time: 0.0073  lr: 0.00025  max_mem: 4011M
[09/16 18:56:23 d2.utils.events]:  eta: 0:08:42  iter: 8439  total_loss: 0.6416  loss_cls: 0.153  loss_box_reg: 0.3853  loss_rpn_cls: 0.01391  loss_rpn_loc: 0.06916  time: 0.9153  data_time: 0.0075  lr: 0.00025  max_mem: 4011M
[09/16 18:56:42 d2.utils.events]:  eta: 0:08:25  iter: 8459  total_loss: 0.6085  loss_cls: 0.1718  loss_box_reg: 0.3725  loss_rpn_cls: 0.01591  loss_rpn_loc: 0.06607  time: 0.9171  data_time: 0.0082  lr: 0.00025  max_mem: 4011M
[09/16 18:57:00 d2.utils.events]:  eta: 0:08:05  iter: 8479  total_loss: 0.6595  loss_cls: 0.184  loss_box_reg: 0.3735  loss_rpn_cls: 0.01201  loss_rpn_loc: 0.0773  time: 0.9169  data_time: 0.0077  lr: 0.00025  max_mem: 4011M
[09/16 18:57:19 d2.utils.events]:  eta: 0:07:47  iter: 8499  total_loss: 0.5941  loss_cls: 0.1414  loss_box_reg: 0.3479  loss_rpn_cls: 0.0163  loss_rpn_loc: 0.08229  time: 0.9178  data_time: 0.0077  lr: 0.00025  max_mem: 4011M
[09/16 18:57:38 d2.utils.events]:  eta: 0:07:27  iter: 8519  total_loss: 0.5753  loss_cls: 0.1394  loss_box_reg: 0.3556  loss_rpn_cls: 0.01136  loss_rpn_loc: 0.072  time: 0.9182  data_time: 0.0079  lr: 0.00025  max_mem: 4011M
[09/16 18:57:56 d2.utils.events]:  eta: 0:07:08  iter: 8539  total_loss: 0.6796  loss_cls: 0.1822  loss_box_reg: 0.4275  loss_rpn_cls: 0.01235  loss_rpn_loc: 0.08427  time: 0.9184  data_time: 0.0076  lr: 0.00025  max_mem: 4011M
[09/16 18:58:15 d2.utils.events]:  eta: 0:06:50  iter: 8559  total_loss: 0.6638  loss_cls: 0.1701  loss_box_reg: 0.3839  loss_rpn_cls: 0.01402  loss_rpn_loc: 0.0756  time: 0.9196  data_time: 0.0078  lr: 0.00025  max_mem: 4011M
[09/16 18:58:34 d2.utils.events]:  eta: 0:06:30  iter: 8579  total_loss: 0.6729  loss_cls: 0.1678  loss_box_reg: 0.3762  loss_rpn_cls: 0.01434  loss_rpn_loc: 0.08968  time: 0.9198  data_time: 0.0075  lr: 0.00025  max_mem: 4011M
[09/16 18:58:52 d2.utils.events]:  eta: 0:06:12  iter: 8599  total_loss: 0.7232  loss_cls: 0.1774  loss_box_reg: 0.42  loss_rpn_cls: 0.01711  loss_rpn_loc: 0.09728  time: 0.9202  data_time: 0.0072  lr: 0.00025  max_mem: 4011M
[09/16 18:59:11 d2.utils.events]:  eta: 0:05:54  iter: 8619  total_loss: 0.5755  loss_cls: 0.1349  loss_box_reg: 0.3596  loss_rpn_cls: 0.01406  loss_rpn_loc: 0.06122  time: 0.9214  data_time: 0.0078  lr: 0.00025  max_mem: 4011M
[09/16 18:59:30 d2.utils.events]:  eta: 0:05:36  iter: 8639  total_loss: 0.6211  loss_cls: 0.165  loss_box_reg: 0.3753  loss_rpn_cls: 0.01355  loss_rpn_loc: 0.07956  time: 0.9222  data_time: 0.0077  lr: 0.00025  max_mem: 4011M
[09/16 18:59:49 d2.utils.events]:  eta: 0:05:17  iter: 8659  total_loss: 0.6029  loss_cls: 0.1477  loss_box_reg: 0.343  loss_rpn_cls: 0.01201  loss_rpn_loc: 0.09203  time: 0.9223  data_time: 0.0080  lr: 0.00025  max_mem: 4011M
[09/16 19:00:08 d2.utils.events]:  eta: 0:04:59  iter: 8679  total_loss: 0.5684  loss_cls: 0.1597  loss_box_reg: 0.3394  loss_rpn_cls: 0.01001  loss_rpn_loc: 0.06237  time: 0.9232  data_time: 0.0073  lr: 0.00025  max_mem: 4011M
[09/16 19:00:28 d2.utils.events]:  eta: 0:04:41  iter: 8699  total_loss: 0.7257  loss_cls: 0.1679  loss_box_reg: 0.4184  loss_rpn_cls: 0.01067  loss_rpn_loc: 0.0805  time: 0.9248  data_time: 0.0079  lr: 0.00025  max_mem: 4011M
[09/16 19:00:47 d2.utils.events]:  eta: 0:04:22  iter: 8719  total_loss: 0.69  loss_cls: 0.1901  loss_box_reg: 0.3841  loss_rpn_cls: 0.01128  loss_rpn_loc: 0.0958  time: 0.9254  data_time: 0.0075  lr: 0.00025  max_mem: 4011M
[09/16 19:01:05 d2.utils.events]:  eta: 0:04:03  iter: 8739  total_loss: 0.5872  loss_cls: 0.1306  loss_box_reg: 0.393  loss_rpn_cls: 0.01204  loss_rpn_loc: 0.07538  time: 0.9256  data_time: 0.0076  lr: 0.00025  max_mem: 4011M
[09/16 19:01:24 d2.utils.events]:  eta: 0:03:44  iter: 8759  total_loss: 0.6928  loss_cls: 0.1982  loss_box_reg: 0.388  loss_rpn_cls: 0.01472  loss_rpn_loc: 0.09344  time: 0.9260  data_time: 0.0075  lr: 0.00025  max_mem: 4011M
[09/16 19:01:44 d2.utils.events]:  eta: 0:03:26  iter: 8779  total_loss: 0.5942  loss_cls: 0.1375  loss_box_reg: 0.3618  loss_rpn_cls: 0.01279  loss_rpn_loc: 0.08231  time: 0.9273  data_time: 0.0073  lr: 0.00025  max_mem: 4011M
[09/16 19:02:03 d2.utils.events]:  eta: 0:03:07  iter: 8799  total_loss: 0.614  loss_cls: 0.1302  loss_box_reg: 0.3741  loss_rpn_cls: 0.01181  loss_rpn_loc: 0.08537  time: 0.9279  data_time: 0.0072  lr: 0.00025  max_mem: 4011M
[09/16 19:02:21 d2.utils.events]:  eta: 0:02:48  iter: 8819  total_loss: 0.6232  loss_cls: 0.1761  loss_box_reg: 0.3612  loss_rpn_cls: 0.01276  loss_rpn_loc: 0.07701  time: 0.9274  data_time: 0.0076  lr: 0.00025  max_mem: 4011M
[09/16 19:02:40 d2.utils.events]:  eta: 0:02:29  iter: 8839  total_loss: 0.6631  loss_cls: 0.1638  loss_box_reg: 0.3521  loss_rpn_cls: 0.01459  loss_rpn_loc: 0.08924  time: 0.9286  data_time: 0.0075  lr: 0.00025  max_mem: 4011M
[09/16 19:03:00 d2.utils.events]:  eta: 0:02:11  iter: 8859  total_loss: 0.5966  loss_cls: 0.1388  loss_box_reg: 0.3558  loss_rpn_cls: 0.01085  loss_rpn_loc: 0.06575  time: 0.9301  data_time: 0.0071  lr: 0.00025  max_mem: 4011M
[09/16 19:03:20 d2.utils.events]:  eta: 0:01:53  iter: 8879  total_loss: 0.5906  loss_cls: 0.1462  loss_box_reg: 0.3391  loss_rpn_cls: 0.009059  loss_rpn_loc: 0.06902  time: 0.9313  data_time: 0.0076  lr: 0.00025  max_mem: 4011M
[09/16 19:03:38 d2.utils.events]:  eta: 0:01:34  iter: 8899  total_loss: 0.6797  loss_cls: 0.1928  loss_box_reg: 0.3888  loss_rpn_cls: 0.01128  loss_rpn_loc: 0.08032  time: 0.9309  data_time: 0.0085  lr: 0.00025  max_mem: 4011M
[09/16 19:03:57 d2.utils.events]:  eta: 0:01:15  iter: 8919  total_loss: 0.6612  loss_cls: 0.1597  loss_box_reg: 0.3883  loss_rpn_cls: 0.01453  loss_rpn_loc: 0.1  time: 0.9313  data_time: 0.0069  lr: 0.00025  max_mem: 4011M
[09/16 19:04:16 d2.utils.events]:  eta: 0:00:56  iter: 8939  total_loss: 0.6114  loss_cls: 0.1627  loss_box_reg: 0.3557  loss_rpn_cls: 0.01082  loss_rpn_loc: 0.07861  time: 0.9319  data_time: 0.0076  lr: 0.00025  max_mem: 4011M
[09/16 19:04:36 d2.utils.events]:  eta: 0:00:37  iter: 8959  total_loss: 0.6809  loss_cls: 0.1741  loss_box_reg: 0.3816  loss_rpn_cls: 0.01404  loss_rpn_loc: 0.08337  time: 0.9325  data_time: 0.0077  lr: 0.00025  max_mem: 4011M
[09/16 19:04:54 d2.utils.events]:  eta: 0:00:18  iter: 8979  total_loss: 0.5338  loss_cls: 0.1293  loss_box_reg: 0.3124  loss_rpn_cls: 0.01238  loss_rpn_loc: 0.0628  time: 0.9326  data_time: 0.0071  lr: 0.00025  max_mem: 4011M
[09/16 19:05:14 d2.utils.events]:  eta: 0:00:00  iter: 8999  total_loss: 0.6318  loss_cls: 0.1478  loss_box_reg: 0.3886  loss_rpn_cls: 0.01246  loss_rpn_loc: 0.0824  time: 0.9336  data_time: 0.0076  lr: 0.00025  max_mem: 4011M
[09/16 19:05:14 d2.engine.hooks]: Overall training speed: 998 iterations in 0:15:31 (0.9336 s / it)
[09/16 19:05:14 d2.engine.hooks]: Total training time: 0:15:32 (0:00:00 on hooks)
stop
In [ ]:

In [26]:
# Setting up threshold to filter out some low score predictions
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

# Setting up threshold to filter out some low score predictions
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1

predictor = DefaultPredictor(cfg)
In [27]:
# Using a sample training dataset
img = Image.open(glob("data/train/*")[500])
#img
In [28]:
# Generating the predictions

img = np.array(img)
outputs = predictor(img)
outputs
Out[28]:
{'instances': Instances(num_instances=12, image_height=600, image_width=800, fields=[pred_boxes: Boxes(tensor([[ 59.2951, 365.4498, 651.9376, 595.3845],
         [299.9325, 302.1376, 305.9844, 315.0922],
         [330.3462, 299.8665, 335.6840, 311.0701],
         [329.3240, 304.1943, 337.4326, 310.9443],
         [299.3181, 307.0965, 306.8748, 314.0390],
         [  0.0000, 416.3310, 129.5694, 600.0000],
         [  0.0000, 416.5103, 143.6540, 600.0000],
         [332.3047, 300.0963, 337.1781, 311.5527],
         [437.6117, 305.5850, 447.4410, 312.8269],
         [327.0793, 304.5764, 335.1206, 310.3562],
         [328.7664, 299.1503, 333.7773, 309.8309],
         [329.2623, 306.0944, 339.0211, 312.9205]], device='cuda:0')), scores: tensor([0.9990, 0.8759, 0.8388, 0.5653, 0.4837, 0.4252, 0.3860, 0.2002, 0.1944,
         0.1516, 0.1455, 0.1435], device='cuda:0'), pred_classes: tensor([2, 3, 3, 0, 0, 1, 0, 3, 0, 0, 3, 0], device='cuda:0')])}
In [29]:
test_images_list = natsorted(glob("data/test/*"))
test_images_list[0]
Out[29]:
'data/test/0.jpg'
In [30]:
from detectron2.utils.visualizer import ColorMode
import matplotlib.pyplot as plt

im = cv2.imread('data/test/999.jpg')
#im = Image.open(glob("data/train/*")[500])



outputs = predictor(im) 
v = Visualizer(
    im[:, :, ::-1],
    #metadata = balloon_metadata, 
    scale = 0.5, 
    instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. Only available for segmentation models
)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
plt.figure(figsize=(15,7))
plt.imshow(out.get_image()[:, :, ::-1][..., ::-1])
Out[30]:
<matplotlib.image.AxesImage at 0x7ff85aeedd00>
In [31]:
# Generating the predictions

pred = []

# Doing though each image
for file_path in tqdm(test_images_list):

  # Reading the image
  img = cv2.imread(file_path)

  # Generating the predictions
  outputs = predictor(img)


  image_path, image_file_name = os.path.split(file_path)
  
  # Getting the image_id of the predictions 
  # ( The image_id in the predictions is the file_id  + 1 )
  image_id = int(image_file_name.split(".")[0])+1


  # Adding the predictions
  for n, boxes in enumerate(outputs['instances'].pred_boxes.tensor.cpu().numpy().tolist()):

    # Converting thr bounding boxes from (x1, y1, x2, y2) to (x, y, w, h)
    preprocessed_box = [boxes[0], boxes[1], abs(boxes[0] - boxes[2]), abs(boxes[1] - boxes[3])]

    pred.append({
        "image_id": image_id,
        "category_id": outputs['instances'].pred_classes[n].cpu().numpy().tolist(),
        "bbox": preprocessed_box,
        "score":outputs['instances'].scores[n].cpu().numpy().tolist()
    })
In [32]:
# Saving the predictions
!rm -rf assets
!mkdir assets

with open('assets/predictions.json', 'w') as f:
    json.dump(pred, f)
stop
In [ ]:

In [ ]:
%load_ext aicrowd.magic

%aicrowd login
The aicrowd.magic extension is already loaded. To reload it, use:
  %reload_ext aicrowd.magic
Please login here: https://api.aicrowd.com/auth/qihuQFuyvYY9OwfvBfEYzt-D--pUgGfwX1aJ45WGrIg
In [ ]:


Comments

You must login before you can post a comment.

Execute