PitVis-2023 challenge: Workflow recognition in videos of endoscopic pituitary surgery.

Das, Adrito; He, Junjun; Jund, Antoine; Speidel, Stefanie; Stoyanov, Danail; Vasconcelos, Francisco; Pérez, Alejandra; Khan, Danyal Z; Wu, Jinlin; Hanrahan, John G; Kasai, Satoshi; Zheng, Guoyan; Pang, You; Ye, Jin; Yamlahi, Amine; Kondo, Satoshi; Mazher, Moona; Rivoir, Dominik; Chen, Zhen; Zhang, Yitong; Płotka, Szymon; Arbeláez, Pablo; Godau, Patrick; Qayyum, Abdul; Kaleta, Joanna; Razzak, Imran; Zou, Xiaoyang; Hirasawa, Kousuke; Rodriguez, Santiago; Psychogyios, Dimitrios; Marcus, Hani J; Li, Tianbin; Bano, Sophia

doi:10.1016/j.media.2025.103716

% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Das:303404,
      author       = {A. Das and D. Z. Khan and D. Psychogyios and Y. Zhang and
                      J. G. Hanrahan and F. Vasconcelos and Y. Pang and Z. Chen
                      and J. Wu and X. Zou and G. Zheng and A. Qayyum and M.
                      Mazher and I. Razzak and T. Li and J. Ye and J. He and S.
                      Płotka and J. Kaleta and A. Yamlahi$^*$ and A. Jund$^*$ and
                      P. Godau$^*$ and S. Kondo and S. Kasai and K. Hirasawa and
                      D. Rivoir and S. Speidel and A. Pérez and S. Rodriguez and
                      P. Arbeláez and D. Stoyanov and H. J. Marcus and S. Bano},
      title        = {{P}it{V}is-2023 challenge: {W}orkflow recognition in videos
                      of endoscopic pituitary surgery.},
      journal      = {Medical image analysis},
      volume       = {106},
      issn         = {1361-8415},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier Science},
      reportid     = {DKFZ-2025-01640},
      pages        = {103716},
      year         = {2025},
      abstract     = {The field of computer vision applied to videos of minimally
                      invasive surgery is ever-growing. Workflow recognition
                      pertains to the automated recognition of various aspects of
                      a surgery, including: which surgical steps are performed;
                      and which surgical instruments are used. This information
                      can later be used to assist clinicians when learning the
                      surgery or during live surgery. The Pituitary Vision
                      (PitVis) 2023 Challenge tasks the community to step and
                      instrument recognition in videos of endoscopic pituitary
                      surgery. This is a particularly challenging task when
                      compared to other minimally invasive surgeries due to: the
                      smaller working space, which limits and distorts vision; and
                      higher frequency of instrument and step switching, which
                      requires more precise model predictions. Participants were
                      provided with 25-videos, with results presented at the
                      MICCAI-2023 conference as part of the Endoscopic Vision 2023
                      Challenge in Vancouver, Canada, on 08-Oct-2023. There were
                      18-submissions from 9-teams across 6-countries, using a
                      variety of deep learning models. The top performing model
                      for step recognition utilised a transformer based
                      architecture, uniquely using an autoregressive decoder with
                      a positional encoding input. The top performing model for
                      instrument recognition utilised a spatial encoder followed
                      by a temporal encoder, which uniquely used a 2-layer
                      temporal architecture. In both cases, these models
                      outperformed purely spatial based models, illustrating the
                      importance of sequential and temporal information. This
                      PitVis-2023 therefore demonstrates state-of-the-art computer
                      vision models in minimally invasive surgery are transferable
                      to a new dataset. Benchmark results are provided in the
                      paper, and the dataset is publicly available at:
                      https://doi.org/10.5522/04/26531686.},
      keywords     = {Endoscopic vision (Other) / Instrument recognition (Other)
                      / Step recognition (Other) / Surgical AI (Other) / Surgical
                      vision (Other) / Workflow analysis (Other)},
      cin          = {E130},
      ddc          = {610},
      cid          = {I:(DE-He78)E130-20160331},
      pnm          = {315 - Bildgebung und Radioonkologie (POF4-315)},
      pid          = {G:(DE-HGF)POF4-315},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:40769094},
      doi          = {10.1016/j.media.2025.103716},
      url          = {https://inrepo02.dkfz.de/record/303404},
}

guest :: login DKFZ
		Search		Submit		Personalize Your alerts Your baskets Your searches		Help