Coretex
text.py
1
# Copyright (C) 2023 Coretex LLC
2
3
# This file is part of Coretex.ai
4
5
# This program is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU Affero General Public License as
7
# published by the Free Software Foundation, either version 3 of the
8
# License, or (at your option) any later version.
9
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU Affero General Public License for more details.
14
15
# You should have received a copy of the GNU Affero General Public License
16
# along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18
# from .token import Token
19
# from .transcription import Transcription
20
# from .utils import getTxtFilePath
21
# from ..coretex import CustomSample
22
23
24
# def loadTxtSample(sample: CustomSample) -> Transcription:
25
# """
26
# Tokenizes text sample
27
28
# Parameters
29
# ----------
30
# sample : CustomSample
31
# sample to be tokenized
32
33
# Returns
34
# -------
35
# Transcription -> text and a list of tokens contained in the text
36
37
# Raises
38
# ------
39
# ValueError -> if provided sample is not a valid text sample
40
# """
41
42
# path = getTxtFilePath(sample)
43
# if path is None:
44
# raise ValueError(f">> [Coretex] {sample.name} does not contain a valid txt file")
45
46
# with path.open("r") as txtFile:
47
# text = "\n".join(txtFile.readlines()).strip()
48
49
# return Transcription.create(text, Token.fromText(text))
coretex
nlp
text.py
Generated by
1.9.1