Authors: Rajat Kumar Singh, Himani Shrotriya, Shivshankar Reddy, Himanshu Bhatt
Affiliation: American Express
Email: rajatks@outlook.com
Description: We trained MViTv2 for Rumsey Map and Mask DINO for IGN map. To further improve the performance, we crop the image into 4 equal parts, we predict on original image and all 4 cropped images and combine the output.
@misc{li2022mask,
title={Mask DINO: Towards A Unified Transformer-based Framework for Object Detection and Segmentation},
author={Feng Li and Hao Zhang and Huaizhe xu and Shilong Liu and Lei Zhang and Lionel M. Ni and Heung-Yeung Shum},
year={2022},
eprint={2206.02777},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@inproceedings{li2021improved,
title={MViTv2: Improved multiscale vision transformers for classification and detection},
author={Li, Yanghao and Wu, Chao-Yuan and Fan, Haoqi and Mangalam, Karttikeya and Xiong, Bo and Malik, Jitendra and Feichtenhofer, Christoph},
booktitle={CVPR},
year={2022}
}