Coretex
coco_converter.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Any, Final, Optional, List, Dict, Set
19 
20 import glob
21 import os
22 import json
23 import logging
24 
25 from ..base_converter import BaseConverter
26 from ...annotation import CoretexImageAnnotation, CoretexSegmentationInstance, BBox
27 
28 
29 class _CocoImageAnnotationData:
30 
31  def __init__(self, data: Dict[str, Any], imageInfo: Dict[str, Any]) -> None:
32  self.data = data
33  self.imageInfo = imageInfo
34 
35 
36 class COCOConverter(BaseConverter):
37 
38  def __init__(self, datasetName: str, projectId: int, datasetPath: str) -> None:
39  super().__init__(datasetName, projectId, datasetPath)
40 
41  self.__imagesPath: Final = os.path.join(datasetPath, "images")
42 
43  annotationsPath = os.path.join(datasetPath, "annotations")
44  self.__fileNames: Final = glob.glob(os.path.join(annotationsPath, "*.json"))
45 
46  def _dataSource(self) -> List[_CocoImageAnnotationData]:
47  fullAnnotationData: List[_CocoImageAnnotationData] = []
48 
49  for fileName in self.__fileNames:
50  with open(fileName) as jsonFile:
51  data = json.load(jsonFile)
52 
53  fullAnnotationData.extend([
54  _CocoImageAnnotationData(data, imageInfo)
55  for imageInfo in data["images"]
56  ])
57 
58  return fullAnnotationData
59 
60  def _extractLabels(self) -> Set[str]:
61  labels: Set[str] = set()
62 
63  for fileName in self.__fileNames:
64  with open(fileName) as jsonFile:
65  data = json.load(jsonFile)
66 
67  for category in data["categories"]:
68  labels.add(category["name"])
69 
70  return labels
71 
72  def __extractInstance(
73  self,
74  categories: List[Dict[str, Any]],
75  annotation: Dict[str, Any]
76  ) -> Optional[CoretexSegmentationInstance]:
77 
78  label: Optional[str] = None
79 
80  for category in categories:
81  if category["id"] == annotation["category_id"]:
82  label = category["name"]
83 
84  if label is None:
85  logging.getLogger("coretexpylib").info(f">> [Coretex] Invalid class: {label}")
86  return None
87 
88  coretexClass = self._dataset.classByName(label)
89  if coretexClass is None:
90  logging.getLogger("coretexpylib").info(f">> [Coretex] Class: ({label}) is not a part of dataset")
91  return None
92 
93  bbox = BBox(*(annotation["bbox"]))
94 
95  if "segmentation" in annotation:
96  segmentation = annotation["segmentation"]
97  else:
98  segmentation = [
99  bbox.polygon
100  ]
101 
102  return CoretexSegmentationInstance.create(
103  coretexClass.classIds[0],
104  bbox,
105  segmentation
106  )
107 
108  def _extractSingleAnnotation(self, annotationData: _CocoImageAnnotationData) -> None:
109  imageName = annotationData.imageInfo["file_name"]
110  width = annotationData.imageInfo["width"]
111  height = annotationData.imageInfo["height"]
112 
113  imagePath = os.path.join(self.__imagesPath, imageName)
114  if not os.path.exists(imagePath):
115  return
116 
117  coretexAnnotation = CoretexImageAnnotation.create(imageName, width, height, [])
118 
119  for annotation in annotationData.data["annotations"]:
120  if annotation["image_id"] != annotationData.imageInfo["id"]:
121  continue
122 
123  instance = self.__extractInstance(annotationData.data["categories"], annotation)
124  if instance is None:
125  continue
126 
127  coretexAnnotation.instances.append(instance)
128 
129  self._saveImageAnnotationPair(imagePath, coretexAnnotation)