Coretex
metrics.py
1 # Copyright (C) 2023 Coretex LLC
2 
3 # This file is part of Coretex.ai
4 
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
9 
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
17 
18 from typing import Tuple, Dict, List
19 
20 import time
21 import logging
22 
23 from ...entities import MetricType, TaskRun, Metric
24 from ...entities.task_run.metrics import metric_factory
25 
26 
27 def _getMetrics() -> List[Metric]:
28  metrics = [
29  metric_factory.createMetric("cpu_usage", "time (s)", MetricType.interval, "usage (%)", MetricType.percent, None, [0, 100]),
30  metric_factory.createMetric("ram_usage", "time (s)", MetricType.interval, "usage (%)", MetricType.percent, None, [0, 100]),
31  metric_factory.createMetric("swap_usage", "time (s)", MetricType.interval, "usage (%)", MetricType.percent, None, [0, 100]),
32  metric_factory.createMetric("download_speed", "time (s)", MetricType.interval, "bytes", MetricType.bytes),
33  metric_factory.createMetric("upload_speed", "time (s)", MetricType.interval, "bytes", MetricType.bytes),
34  metric_factory.createMetric("disk_read", "time (s)", MetricType.interval, "bytes", MetricType.bytes),
35  metric_factory.createMetric("disk_write", "time (s)", MetricType.interval, "bytes", MetricType.bytes)
36  ]
37 
38  # If GPU exists add GPU related metrics to the list
39  try:
40  from py3nvml import py3nvml
41 
42  # Do not shutdown otherwise when extracting gpu metrics it will throw error
43  py3nvml.nvmlInit()
44 
45  metrics.extend([
46  metric_factory.createMetric("gpu_usage", "time (s)", MetricType.interval, "usage (%)", MetricType.percent, None, [0, 100]),
47  metric_factory.createMetric("gpu_temperature", "time (s)", MetricType.interval, "usage (%)", MetricType.percent),
48  metric_factory.createMetric("gpu_memory_usage", "time (s)", MetricType.interval, "usage (%)", MetricType.percent, None, [0, 100])
49  ])
50 
51  logging.getLogger("coretexpylib").debug(">> [Coretex] Initialized GPU metrics")
52  except:
53  logging.getLogger("coretexpylib").debug(">> [Coretex] Failed to initialize GPU metrics")
54 
55  return metrics
56 
57 
58 def create(taskRun: TaskRun) -> None:
59  taskRun.createMetrics(_getMetrics())
60 
61 
62 def upload(taskRun: TaskRun) -> None:
63  x = time.time()
64  metricValues: Dict[str, Tuple[float, float]] = {}
65 
66  for metric in taskRun.metrics:
67  metricValue = metric.extract()
68 
69  if metricValue is not None:
70  metricValues[metric.name] = x, metricValue
71 
72  taskRun.submitMetrics(metricValues)