Coretex
create_ml_converter.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Any, Optional, List, Set, Dict
19 
20 import os
21 import json
22 import glob
23 import logging
24 
25 from PIL import Image
26 
27 from ..base_converter import BaseConverter
28 from ...annotation import CoretexSegmentationInstance, CoretexImageAnnotation, BBox
29 
30 
31 class CreateMLConverter(BaseConverter):
32 
33  def __init__(self, datasetName: str, projectId: int, datasetPath: str) -> None:
34  super().__init__(datasetName, projectId, datasetPath)
35 
36  self.__imagesPath = os.path.join(datasetPath, "images")
37 
38  annotations = os.path.join(datasetPath, "annotations")
39  self.__fileNames = glob.glob(os.path.join(annotations, "*.json"))
40 
41  def _dataSource(self) -> List[str]:
42  return self.__fileNames
43 
44  def _extractLabels(self) -> Set[str]:
45  labels: Set[str] = set()
46 
47  for fileName in self.__fileNames:
48  with open(fileName) as jsonFile:
49  data = json.load(jsonFile)[0]
50 
51  for annotation in data["annotations"]:
52  labels.add(annotation["label"])
53 
54  return labels
55 
56  def __extractBBox(self, bbox: Dict[str, int]) -> BBox:
57  return BBox(
58  bbox["x"] - bbox["width"] // 2,
59  bbox["y"] - bbox["height"] // 2,
60  bbox["width"],
61  bbox["height"]
62  )
63 
64  def __extractInstance(self, annotation: Dict[str, Any]) -> Optional[CoretexSegmentationInstance]:
65  label = annotation["label"]
66 
67  coretexClass = self._dataset.classByName(label)
68  if coretexClass is None:
69  logging.getLogger("coretexpylib").info(f">> [Coretex] Class: ({label}) is not a part of dataset")
70  return None
71 
72  bbox = self.__extractBBox(annotation["coordinates"])
73  return CoretexSegmentationInstance.create(coretexClass.classIds[0], bbox, [bbox.polygon])
74 
75  def __extractImageAnnotation(self, imageAnnotation: Dict[str, Any]) -> None:
76  imageName = imageAnnotation["image"]
77  image = Image.open(f"{self.__imagesPath}/{imageName}")
78 
79  coretexAnnotation = CoretexImageAnnotation.create(imageName, image.width, image.height, [])
80 
81  for annotation in imageAnnotation["annotations"]:
82  instance = self.__extractInstance(annotation)
83  if instance is None:
84  continue
85 
86  coretexAnnotation.instances.append(instance)
87 
88  self._saveImageAnnotationPair(os.path.join(self.__imagesPath, imageName), coretexAnnotation)
89 
90  def _extractSingleAnnotation(self, fileName: str) -> None:
91  with open(fileName) as jsonFile:
92  data = json.load(jsonFile)[0]
93 
94  self.__extractImageAnnotation(data)