Coretex
text.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 # from .token import Token
19 # from .transcription import Transcription
20 # from .utils import getTxtFilePath
21 # from ..coretex import CustomSample
22 
23 
24 # def loadTxtSample(sample: CustomSample) -> Transcription:
25 # """
26 # Tokenizes text sample
27 
28 # Parameters
29 # ----------
30 # sample : CustomSample
31 # sample to be tokenized
32 
33 # Returns
34 # -------
35 # Transcription -> text and a list of tokens contained in the text
36 
37 # Raises
38 # ------
39 # ValueError -> if provided sample is not a valid text sample
40 # """
41 
42 # path = getTxtFilePath(sample)
43 # if path is None:
44 # raise ValueError(f">> [Coretex] {sample.name} does not contain a valid txt file")
45 
46 # with path.open("r") as txtFile:
47 # text = "\n".join(txtFile.readlines()).strip()
48 
49 # return Transcription.create(text, Token.fromText(text))