Coretex
yolo_converter.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Optional, List, Set
19 
20 import os
21 import re
22 import logging
23 
24 from PIL import Image
25 
26 from ..base_converter import BaseConverter
27 from ...annotation import CoretexImageAnnotation, CoretexSegmentationInstance, BBox
28 
29 
30 class Helper:
31 
32  @staticmethod
33  def isFloat(value: str) -> bool:
34  try:
35  float(value)
36  return True
37  except ValueError:
38  return False
39 
40 
41 class YoloConverter(BaseConverter):
42 
43  def __init__(self, datasetName: str, projectId: int, datasetPath: str) -> None:
44  super().__init__(datasetName, projectId, datasetPath)
45 
46  self.__imagesPath = os.path.join(datasetPath, "images")
47  self.__annotations = os.path.join(datasetPath, "annotations")
48 
49  classesPath = os.path.join(self.__annotations, "classes.txt")
50  if not os.path.exists(classesPath):
51  raise FileNotFoundError(">> [Coretex] classes.txt file not found")
52 
53  with open(classesPath, 'r') as f:
54  text = f.read()
55  self.__rawLabels = text.split("\n")
56  self.__rawLabels = [label for label in self.__rawLabels if label]
57 
58  def _dataSource(self) -> List[str]:
59  return os.listdir(self.__annotations)
60 
61  def _extractLabels(self) -> Set[str]:
62  return set(self.__rawLabels)
63 
64  def __extractBBox(self, rawInstance: List[str], width: int, height: int) -> BBox:
65  xYolo = float(rawInstance[1])
66  yYolo = float(rawInstance[2])
67  wYolo = float(rawInstance[3])
68  hYolo = float(rawInstance[4])
69 
70  boxWidth = int(wYolo * width)
71  boxHeight = int(hYolo * height)
72  xMin = int(xYolo * width - (boxWidth / 2))
73  yMin = int(yYolo * height - (boxHeight / 2))
74 
75  return BBox(xMin, yMin, boxWidth, boxHeight)
76 
77  def __extractInstance(self, rawInstance: List[str], width: int, height: int) -> Optional[CoretexSegmentationInstance]:
78  # Get class name
79  labelId = int(rawInstance[0])
80  label = self.__rawLabels[labelId]
81 
82  coretexClass = self._dataset.classByName(label)
83  if coretexClass is None:
84  logging.getLogger("coretexpylib").info(f">> [Coretex] Class: ({label}) is not a part of dataset")
85  return None
86 
87  bbox = self.__extractBBox(rawInstance, width, height)
88  return CoretexSegmentationInstance.create( coretexClass.classIds[0], bbox, [bbox.polygon])
89 
90  def _extractSingleAnnotation(self, yoloFilePath: str) -> None:
91  if not yoloFilePath.endswith("txt"):
92  return
93 
94  if os.path.splitext(yoloFilePath)[0] == "classes":
95  return
96 
97  yoloFilePath = os.path.join(self.__annotations, yoloFilePath)
98  yoloName = os.path.basename(yoloFilePath)
99 
100  imagePath = self.imageCheck(os.path.join(self.__imagesPath, yoloName))
101  if imagePath is not None:
102  imageName = os.path.basename(imagePath)
103 
104  baseImageName = os.path.splitext(imageName)[0]
105  baseYoloName = os.path.splitext(yoloName)[0]
106 
107  if baseImageName != baseYoloName:
108  return
109 
110  with open(yoloFilePath, 'r') as file:
111  allLines = file.readlines()
112 
113  if imagePath is None:
114  raise RuntimeError(f"Image at path {imagePath} doesn't exist.")
115 
116  image = Image.open(imagePath)
117  coretexAnnotation = CoretexImageAnnotation.create(imageName, image.width, image.height, [])
118 
119  # Get bounding boxes and classes from yolo txt
120  for line in allLines:
121  yoloArray = re.split("\s", line.rstrip())
122  isFormatCorrect = YoloConverter.formatCheck(yoloArray)
123 
124  if not isFormatCorrect:
125  continue
126 
127  instance = self.__extractInstance(yoloArray, image.width, image.height)
128  if instance is None:
129  continue
130 
131  coretexAnnotation.instances.append(instance)
132 
133  self._saveImageAnnotationPair(os.path.join(self.__imagesPath, imageName), coretexAnnotation)
134 
135  @staticmethod
136  def imageCheck(yoloFilePath: str) -> Optional[str]:
137  if os.path.exists(yoloFilePath.replace('txt', 'jpeg')):
138  return yoloFilePath.replace('txt', 'jpeg')
139  if os.path.exists(yoloFilePath.replace('txt', 'jpg')):
140  return yoloFilePath.replace('txt', 'jpg')
141  if os.path.exists(yoloFilePath.replace('txt', 'png')):
142  return yoloFilePath.replace('txt', 'png')
143 
144  return None
145 
146  @staticmethod
147  def formatCheck(yoloArray: List[str]) -> bool:
148  """
149  Checks format of yolo annotation file
150 
151  Parameters
152  ----------
153  yoloArray : List[str]
154  list with label id and bounding boxes
155 
156  Returns
157  -------
158  bool -> True if format is correct, False if format is not correct
159  """
160 
161  if len(yoloArray) != 5:
162  return False
163 
164  for value in yoloArray:
165  if not Helper.isFloat(value):
166  return False
167 
168  return True