Coretex
pascal_2012_converter.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import List, Set
19 
20 import os
21 import glob
22 import xml.etree.ElementTree as ET
23 
24 from .shared import getTag, toInt
25 from .instance_extractor import InstanceExtractor
26 from ...base_converter import BaseConverter
27 from .....entities import CoretexImageAnnotation
28 
29 
31 
32  """
33  Represents the Converter from Pascal VOC 2012 Format to Cortex Format
34  """
35 
36  def __init__(self, datasetName: str, projectId: int, datasetPath: str) -> None:
37  super().__init__(datasetName, projectId, datasetPath)
38 
39  self.__imagesPath__imagesPath = os.path.join(datasetPath, "JPEGImages")
40  self.__segmentationPath__segmentationPath = os.path.join(datasetPath, "SegmentationObject")
41 
42  annotations = os.path.join(datasetPath, "Annotations")
43  self.__fileNames__fileNames = glob.glob(os.path.join(annotations, "*.xml"))
44 
45  def _dataSource(self) -> List[str]:
46  return self.__fileNames__fileNames
47 
48  def _extractLabels(self) -> Set[str]:
49  labels: Set[str] = set()
50 
51  for filename in self.__fileNames__fileNames:
52  tree = ET.parse(filename)
53  root = tree.getroot()
54  objects = root.findall("object")
55 
56  for obj in objects:
57  labelElement = obj.find("name")
58  if labelElement is None:
59  continue
60 
61  label = labelElement.text
62  if label is None:
63  continue
64 
65  labels.add(label)
66 
67  return labels
68 
69  def __extractImageAnnotation(self, root: ET.Element) -> None:
70  fileName = getTag(root, "filename")
71  if fileName is None:
72  return
73 
74  baseFileName = os.path.splitext(fileName)[0]
75  filenamePNG = f"{baseFileName}.png"
76 
77  if not os.path.exists(os.path.join(self.__imagesPath__imagesPath, fileName)):
78  return
79 
80  instanceExtractor = InstanceExtractor(self._dataset)
81  instances = instanceExtractor.extractInstances(root, filenamePNG, self.__segmentationPath__segmentationPath)
82 
83  size = root.find('size')
84  if size is None:
85  return
86 
87  width, height = toInt(size, "width", "height")
88  if width is None or height is None:
89  return
90 
91  coretexAnnotation = CoretexImageAnnotation.create(fileName, width, height, instances)
92  self._saveImageAnnotationPair_saveImageAnnotationPair(os.path.join(self.__imagesPath__imagesPath, fileName), coretexAnnotation)
93 
94  def _extractSingleAnnotation(self, fileName: str) -> None:
95  tree = ET.parse(fileName)
96  root = tree.getroot()
97 
98  self.__extractImageAnnotation__extractImageAnnotation(root)
None _saveImageAnnotationPair(self, str imagePath, CoretexImageAnnotation annotation)